Analisis de informes mensuales rydoo

Descripcion:

La fuente de datos estudiada son los informes mensuales que entrega el sistema rydoo. Se estudian 10 meses desde el 01/01/2022 al 31/10/2022 con un total de 17.935 registros con código chileno.

Pre procesado

In [1]:
#Librerias

import numpy as np
from plotnine import *
from decimal import *
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.9f' % x)
In [2]:
#Carga de datos
files = ['reporte_1-2022-10-2022.xlsx']
vars_name = ['Cantidad ', 'Fecha de la transacción ', 'Fecha de aprobación ', 'Fecha de finalización ', 'Trabajador ', 'Fondo fijo', 'Categoría ', 'Proveedor ', 'Proyecto ', 'Tipo Documento', 'Sucursal ']
df = pd.concat([pd.read_excel("data/"+x).loc[:,vars_name] for x in files])
In [3]:
#Agregar columna de fechas para agrupar
df['Fecha_de_la_transacción_month'] = df['Fecha de la transacción '].dt.month
df['Fecha_de_la_transacción_year'] = df['Fecha de la transacción '].dt.year
df['Fecha_de_la_transacción_day'] = df['Fecha de la transacción '].dt.day
df['Fecha_de_la_transacción_day_name'] = df['Fecha de la transacción '].dt.day_name()


df['Fecha_de_la_transacción_year_month'] = df['Fecha de la transacción '].dt.strftime('%Y-%m')
df['Fecha_de_la_transacción_year_month_day'] = df['Fecha_de_la_transacción_year'].astype("string") + '-' + df['Fecha_de_la_transacción_month'].astype("string") + '-' + df['Fecha_de_la_transacción_day'].astype("string")

df['delta_Fecha_aprobación_transacción'] = df['Fecha de aprobación '] - df['Fecha de la transacción ']
df['delta_Fecha_aprobación_transacción_day_int'] = df['delta_Fecha_aprobación_transacción'].dt.days

categories = pd.to_datetime(df['Fecha_de_la_transacción_year_month'].unique(), format='%Y-%m').sort_values('Fecha_de_la_transacción_year_month')[0].strftime('%Y-%m')
df['Fecha_de_la_transacción_year_month'] = pd.Categorical(df['Fecha_de_la_transacción_year_month'], categories=categories, ordered=False)

categories = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['Fecha_de_la_transacción_day_name'] = pd.Categorical(df['Fecha_de_la_transacción_day_name'], categories=categories, ordered=False)
In [4]:
#filtrado por tipo de documento

#df['Tipo Documento'].unique()

#>> array(['CHL - Documento Exento', 'CHL - Boleta', 'CHL - Factura Afecta',
#       nan, 'CHL - Factura Exenta', 'ARG - Gastos de viajes - Interior',
#       'ARG - Gastos de automóviles', 'CHL - Boleta de Honorario',
#       'ECU - Documentos internos', 'BRA - Reembolsos de despesas',
#       'ESP - Gastos de viaje', 'PER - Gastos de viaje',
#       'ECU - Reembolsos de gastos / Gastos de viaje'], dtype=object)

#Filtrar solo registros con timpo de documento = ['CHL - Documento Exento', 'CHL - Boleta', 'CHL - Factura Afecta','CHL - Factura Exenta', 'CHL - Boleta de Honorario'¨]
filter_list=['CHL - Documento Exento', 'CHL - Boleta', 'CHL - Factura Afecta','CHL - Factura Exenta', 'CHL - Boleta de Honorario']

df = df[df['Tipo Documento'].isin(filter_list)]

Analisis sobre conjunto completo

Monto

Monto total

In [9]:
#Estadisticas del monto
stats = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.rename(columns={'Fecha_de_la_transacción_year_month':'month'})
Out[9]:
month count mean std min 25% 50% 75% max kurt sum
0 2022-01 1556.000000000 37911.463701799 123997.004852571 0.000000000 4790.000000000 13800.000000000 31552.750000000 2176000.000000000 138.933863060 58990237.520000003
1 2022-02 1457.000000000 32563.518894990 96492.883949955 0.000000000 5000.000000000 13600.000000000 31535.000000000 2490722.000000000 323.393381191 47445047.030000001
2 2022-03 1943.000000000 53545.485028307 362703.909635602 0.000000000 5700.000000000 15000.000000000 36100.000000000 9796093.000000000 544.095252310 104038877.409999996
3 2022-04 1774.000000000 46222.957220970 155646.749259334 0.000000000 5692.500000000 13994.000000000 37190.000000000 2766400.000000000 138.001916199 81999526.109999999
4 2022-05 1874.000000000 36104.556494130 95593.649228895 10.300000000 5950.000000000 14000.000000000 35000.000000000 1686587.000000000 128.799146126 67659938.870000005
5 2022-06 1826.000000000 46413.754874042 195348.257432519 0.000000000 5000.000000000 12875.000000000 34641.750000000 4767229.000000000 268.674871754 84751516.400000006
6 2022-07 1749.000000000 36038.207547170 140978.797285946 0.000000000 5000.000000000 13500.000000000 35500.000000000 4331814.000000000 539.469134240 63030825.000000000
7 2022-08 2083.000000000 37860.522376380 138523.808540887 100.000000000 5135.000000000 14350.000000000 35990.000000000 3349463.000000000 286.141865585 78863468.109999999
8 2022-09 1922.000000000 39423.673772112 158905.484509575 130.000000000 5000.000000000 13500.000000000 30237.500000000 3500000.000000000 250.296476936 75772300.989999995
9 2022-10 1751.000000000 69343.237190177 317732.880833097 0.000000000 6500.000000000 18000.000000000 43675.000000000 6116040.000000000 199.118563174 121420008.319999993
In [261]:
#Suma del monto por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'sum')) + geom_bar(stat='sum', fill='#0C475B') + coord_flip() + xlab('Mes') + ylab('Monto(CLP)') + theme_bw() + theme(legend_position="none")
Out[261]:
<ggplot: (139753355237)>
In [262]:
#Suma del monto por mes y numero de registros (en color)
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'sum', fill='count')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('Monto (CLP)') + theme_bw() + guides(size = False) + labs(fill = "N° Registros")
Out[262]:
<ggplot: (139753708073)>
In [263]:
#Volumetria de registros por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'count')) + geom_bar(stat='sum', fill='#0C475B') + coord_flip() + xlab('Mes') + ylab('N° Registros') + theme_bw() + theme(legend_position="none")
Out[263]:
<ggplot: (139753995457)>
In [264]:
#N registros por mes y monto total 
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'count', fill='sum')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('N° registros') + theme_bw() + guides(size = False) + labs(fill = "Monto (CLP)")
Out[264]:
<ggplot: (139754053218)>
In [268]:
#Volumen de registros y Monto por mes (No se entiende bien)
ggplot(stats, aes(x='count',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + theme_bw() + labs(fill = "Monto (CLP)", size = 'N° registros')
Out[268]:
<ggplot: (139855150525)>
In [269]:
#Boxplots montos por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="lightskyblue") + xlab('Mes') + ylab('Monto(CLP)') + coord_flip() +  theme_bw()
Out[269]:
<ggplot: (139855150858)>
In [8]:
#Boxplots montos por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="lightskyblue") + xlab('Mes') + ylab('Monto(CLP)') + coord_flip() +  theme_bw()
Out[8]:
<ggplot: (172681670888)>
In [10]:
#histograma del monto por mes
ggplot(df, aes(x='Cantidad ')) + geom_histogram(bins=30, fill='#0C475B') + facet_wrap('Fecha_de_la_transacción_year_month', ncol = 2, scales='free') + xlab('Monto (CLP)') + ylab('N° registros') + theme_bw() + theme(legend_position='none', panel_spacing_y=1, panel_spacing_x=0.5, aspect_ratio = 0.5, strip_text_x = element_text(size = 12), strip_margin=-0.2, strip_background=element_blank(), axis_text_x=element_text(rotation=50, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[10]:
<ggplot: (172679896698)>
In [428]:
#Mediana vs promedio 
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='identity', position='dodge') + xlab('Mes') + ylab('Monto')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1))
Out[428]:
<ggplot: (139743874799)>
In [356]:
#estadisticas de volumetria de registros diaria (count = cantidad de dias con registros, 50% es la mediana de registros diaria)
count_by_day = df.groupby('Fecha de la transacción ')['Fecha de la transacción '].count().to_frame().rename({'Fecha de la transacción ': "count"}, axis='columns').reset_index()
count_by_day['Fecha_de_la_transacción_year_month']= count_by_day['Fecha de la transacción '].dt.strftime('%Y-%m')
stats = count_by_day.groupby('Fecha_de_la_transacción_year_month')['count'].describe()

stats = stats.reset_index()
stats
Out[356]:
Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 2022-01 30.000000000 51.866666667 29.088134011 2.000000000 18.000000000 62.000000000 71.750000000 97.000000000
1 2022-02 28.000000000 52.035714286 29.236560246 3.000000000 20.750000000 59.500000000 73.250000000 106.000000000
2 2022-03 31.000000000 62.677419355 31.368122988 5.000000000 46.000000000 66.000000000 84.500000000 115.000000000
3 2022-04 30.000000000 59.133333333 32.549943941 4.000000000 21.000000000 70.000000000 87.250000000 108.000000000
4 2022-05 31.000000000 60.451612903 33.617295856 4.000000000 23.500000000 69.000000000 86.500000000 117.000000000
5 2022-06 30.000000000 60.866666667 35.830764409 3.000000000 24.500000000 77.000000000 86.750000000 123.000000000
6 2022-07 31.000000000 56.419354839 32.288051653 3.000000000 21.000000000 69.000000000 80.000000000 101.000000000
7 2022-08 31.000000000 67.193548387 37.094850815 3.000000000 24.500000000 80.000000000 91.500000000 153.000000000
8 2022-09 30.000000000 64.066666667 39.647527499 2.000000000 18.750000000 78.500000000 89.500000000 121.000000000
9 2022-10 31.000000000 56.483870968 38.021810379 3.000000000 19.000000000 64.000000000 85.500000000 125.000000000
In [274]:
#distribucion de volumetria diaria por mes
ggplot(count_by_day, aes(x='Fecha_de_la_transacción_year_month',y='count ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Registros') +  theme_bw() + coord_flip()
Out[274]:
<ggplot: (139744309318)>

Monto por Tipo de documento

In [535]:
#Estadisticas de monto por mes y tipo de documento (head 10)
stats = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(10)
Out[535]:
Tipo Documento Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 CHL - Boleta 2022-03 1.000000000 18000.000000000 NaN 18000.000000000 18000.000000000 18000.000000000 18000.000000000 18000.000000000 NaN 18000.000000000
1 CHL - Boleta 2022-04 1.000000000 14850.000000000 NaN 14850.000000000 14850.000000000 14850.000000000 14850.000000000 14850.000000000 NaN 14850.000000000
2 CHL - Boleta 2022-05 1.000000000 13265.000000000 NaN 13265.000000000 13265.000000000 13265.000000000 13265.000000000 13265.000000000 NaN 13265.000000000
3 CHL - Boleta 2022-06 5.000000000 8990.000000000 11740.017035763 1050.000000000 4700.000000000 4700.000000000 4700.000000000 29800.000000000 4.668625163 44950.000000000
4 CHL - Boleta 2022-07 20.000000000 36904.250000000 69628.901199880 2000.000000000 4700.000000000 5150.000000000 15000.000000000 245800.000000000 4.220933689 738085.000000000
5 CHL - Boleta 2022-08 29.000000000 14200.241379310 13512.394764108 600.000000000 4700.000000000 8920.000000000 20990.000000000 45300.000000000 0.095340057 411807.000000000
6 CHL - Boleta 2022-09 137.000000000 35600.818175182 72131.080002038 400.000000000 7000.000000000 14700.000000000 35000.000000000 693165.000000000 52.827116790 4877312.090000000
7 CHL - Boleta 2022-10 320.000000000 29155.312500000 31673.866655616 270.000000000 7450.000000000 19326.000000000 40000.000000000 220000.000000000 7.729252457 9329700.000000000
8 CHL - Boleta de Honorario 2022-09 6.000000000 53375.000000000 2547.106201162 50000.000000000 51700.000000000 53500.000000000 55000.000000000 56650.000000000 -1.722597415 320250.000000000
9 CHL - Boleta de Honorario 2022-10 10.000000000 40843.700000000 36335.202049956 3000.000000000 9750.000000000 35000.000000000 55000.000000000 109687.000000000 -0.272305811 408437.000000000
In [516]:
#monto total por mes por tipo de documento
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[516]:
<ggplot: (139752344631)>
In [517]:
#monto total por mes por tipo de documento y cantidad de registros
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill='count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10),  strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[517]:
<ggplot: (139750367518)>
In [536]:
#N° registros por mes por tipo de documento
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[536]:
<ggplot: (139750221095)>
In [537]:
#N° registros por mes por tipo de documento y monto total
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = 'Monto (CLP)')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[537]:
<ggplot: (139745949988)>
In [538]:
#Volumen de registros y Monto por mes y Tipo de documento

ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "Monto (CLP)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[538]:
<ggplot: (139862025045)>
In [539]:
#boxplot monto por mes por tipo de documento 
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[539]:
<ggplot: (139865036731)>
In [540]:
#Violin monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[540]:
<ggplot: (139865818058)>
In [541]:
#Mediana vs promedio por tipo de documento
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Tipo Documento', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Tipo Documento'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[541]:
<ggplot: (139866235721)>

Monto por Fondo fijo / Anticipo / Reembolso

In [542]:
#Estadisticas por fondo fijo (head 10)
stats = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(10)
Out[542]:
Fondo fijo Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 ANTICIPO (VxR) 2022-01 57.000000000 327960.140350877 489657.449180162 936.000000000 34400.000000000 91105.000000000 500000.000000000 2176000.000000000 4.636796217 18693728.000000000
1 ANTICIPO (VxR) 2022-02 49.000000000 170791.530612245 294723.434672526 1100.000000000 12450.000000000 21277.000000000 175104.000000000 1190000.000000000 4.444505249 8368785.000000000
2 ANTICIPO (VxR) 2022-03 166.000000000 330403.740963855 1193743.781590327 800.000000000 8775.000000000 30167.500000000 135596.750000000 9796093.000000000 46.991017388 54847021.000000000
3 ANTICIPO (VxR) 2022-04 131.000000000 233181.267175573 468314.384100152 0.000000000 19950.000000000 53700.000000000 198589.500000000 2766400.000000000 12.488317015 30546746.000000000
4 ANTICIPO (VxR) 2022-05 97.000000000 164735.371134021 295118.934899535 2600.000000000 19950.000000000 51250.000000000 126660.000000000 1686587.000000000 13.031007867 15979331.000000000
5 ANTICIPO (VxR) 2022-06 124.000000000 273423.572580645 651614.918934341 759.000000000 19712.250000000 56056.000000000 170098.750000000 4767229.000000000 23.137718164 33904523.000000000
6 ANTICIPO (VxR) 2022-07 58.000000000 264959.241379310 689402.328472045 550.000000000 3887.500000000 12479.000000000 155537.500000000 4331814.000000000 22.048295861 15367636.000000000
7 ANTICIPO (VxR) 2022-08 92.000000000 218918.565217391 550378.407254765 500.000000000 9800.000000000 38795.000000000 108420.000000000 3349463.000000000 17.314978794 20140508.000000000
8 ANTICIPO (VxR) 2022-09 38.000000000 538696.289473684 905344.869130346 8400.000000000 18637.500000000 129227.500000000 556115.750000000 3500000.000000000 3.466450326 20470459.000000000
9 ANTICIPO (VxR) 2022-10 108.000000000 461148.120370370 935080.230456866 7100.000000000 40337.500000000 115110.000000000 409002.000000000 5999544.000000000 16.489103411 49803997.000000000
In [543]:
#plot monto total por mes por fondo fijo
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[543]:
<ggplot: (139868252344)>
In [544]:
#plot monto total por mes por fondo fijo y numero de registros
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[544]:
<ggplot: (139868533682)>
In [546]:
#plot Volumen de registros por mes por fondo fijo
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[546]:
<ggplot: (139869073396)>
In [547]:
#plot Volumen de registros por mes por fondo fijo y monto total

ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "Monto (CLP)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[547]:
<ggplot: (139869217651)>
In [548]:
#Volumen de registros y Monto por mes y Fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())  + labs(fill = "Monto (CLP)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[548]:
<ggplot: (139869377317)>
In [549]:
#boxplot monto por mes por fondo fijo 
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[549]:
<ggplot: (139869522446)>
In [550]:
#Violin monto por mes por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10),  strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[550]:
<ggplot: (139869887975)>
In [551]:
#Mediana vs promedio por fondo fijo
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Fondo fijo', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Fondo fijo'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[551]:
<ggplot: (139871213686)>

Monto por Sucursal

In [552]:
#Estadisticas por Sucursal (head 5)
stats = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[552]:
Sucursal Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 GESTION INTEGRAL DE RESIDUOS SPA 2022-01 29.000000000 14757.379310345 16716.822441169 730.000000000 2200.000000000 7000.000000000 20990.000000000 59080.000000000 0.568508935 427964.000000000
1 GESTION INTEGRAL DE RESIDUOS SPA 2022-02 19.000000000 9596.300000000 10793.736293126 650.000000000 2200.000000000 8370.000000000 13700.000000000 46954.700000000 7.799474485 182329.700000000
2 GESTION INTEGRAL DE RESIDUOS SPA 2022-03 30.000000000 16061.966666667 13189.282363901 1300.000000000 5243.250000000 14700.000000000 23312.500000000 48410.000000000 -0.165472247 481859.000000000
3 GESTION INTEGRAL DE RESIDUOS SPA 2022-04 35.000000000 20914.542857143 18507.397006639 750.000000000 4750.000000000 15000.000000000 31480.500000000 70000.000000000 0.041396137 732009.000000000
4 GESTION INTEGRAL DE RESIDUOS SPA 2022-05 43.000000000 23956.000000000 32423.697072622 700.000000000 4500.000000000 14000.000000000 33465.000000000 188756.000000000 15.605371463 1030108.000000000
In [366]:
#aporte al monto total por mes y sucursal 
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill='Sucursal ')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto(CLP)') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1)) 
Out[366]:
<ggplot: (139744010781)>
In [554]:
#plot monto total por mes por sucursal
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[554]:
<ggplot: (139863311216)>
In [555]:
#plot monto total por mes por sucursal y numero de registros
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[555]:
<ggplot: (139856358773)>
In [557]:
#plot Volumen de registros por mes por sucursal
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[557]:
<ggplot: (139741896464)>
In [558]:
#plot Volumen de registros por mes por sucursal y monto total
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Monto (CLP)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[558]:
<ggplot: (139739964486)>
In [560]:
#Volumen de registros y Monto por mes y Fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8),  strip_background=element_blank())  + labs(fill = "Monto (CLP)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[560]:
<ggplot: (139743571722)>
In [393]:
#Volumen de registros y Monto por mes y Sucursal
ggplot(stats, aes(x='Sucursal ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count'))  + xlab('Sucursal') + ylab('Mes') + theme_bw() +  coord_flip() + theme(axis_text_x=element_text(rotation=50, hjust=1)) + labs(fill = "Monto (CLP)", size = 'N° registros')
Out[393]:
<ggplot: (139753868916)>
In [561]:
#boxplot monto por mes por sucursal
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[561]:
<ggplot: (139859481099)>
In [562]:
#Violin monto por mes por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[562]:
<ggplot: (139871177566)>
In [563]:
#Mediana vs promedio por sucursal
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Sucursal ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Sucursal '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[563]:
<ggplot: (139869081919)>

Monto por proyecto

In [565]:
#Estadisticas por Proyecto (head 5)
stats = df.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[565]:
Proyecto Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 100 - Santiago (Quilicura) 2022-01 45.000000000 120545.711111111 331877.066313624 745.000000000 1200.000000000 11771.000000000 50330.000000000 1598711.000000000 14.946562088 5424557.000000000
1 100 - Santiago (Quilicura) 2022-02 22.000000000 55918.818181818 93097.727402899 745.000000000 7475.000000000 15500.000000000 57876.500000000 403350.000000000 9.489058858 1230214.000000000
2 100 - Santiago (Quilicura) 2022-03 39.000000000 258202.574358974 1063163.105397802 1340.000000000 9305.000000000 31238.000000000 117957.700000000 6678684.000000000 37.743016111 10069900.400000000
3 100 - Santiago (Quilicura) 2022-04 37.000000000 98248.675675676 269866.010419065 600.000000000 8104.000000000 22750.000000000 56450.000000000 1600819.000000000 28.405000347 3635201.000000000
4 100 - Santiago (Quilicura) 2022-05 42.000000000 55868.619047619 73025.105750027 800.000000000 8578.000000000 20925.000000000 63827.500000000 332700.000000000 4.113420906 2346482.000000000
In [566]:
#top Prouectos con mayor monto por mes (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(5)
Out[566]:
Proyecto Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
Index
0 100 - Santiago (Quilicura) 2022-01 45.000000000 120545.711111111 331877.066313624 745.000000000 1200.000000000 11771.000000000 50330.000000000 1598711.000000000 14.946562088 5424557.000000000
1 570 -Temuco Municipal 2022-01 23.000000000 235219.652173913 568115.863922029 2000.000000000 11925.000000000 24000.000000000 55000.000000000 2176000.000000000 7.859434367 5410052.000000000
2 550 - Puerto Montt 2022-01 125.000000000 39680.248000000 127222.840524187 200.000000000 5400.000000000 15000.000000000 34000.000000000 981750.000000000 39.785697239 4960031.000000000
3 300 - Vina del Mar 2022-01 101.000000000 23651.029702970 26324.282865999 800.000000000 7600.000000000 16898.000000000 34185.000000000 223600.000000000 32.640682194 2388754.000000000
4 402 - GERENCIA 2022-01 10.000000000 235344.800000000 537675.869926565 11000.000000000 23768.500000000 50317.000000000 51970.000000000 1742650.000000000 9.175458189 2353448.000000000
In [570]:
#Top Proyecto por monto total por mes 
categoria_list = stats_top['Proyecto '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]

categoria_cat = pd.Categorical(stats_top['Proyecto '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)


ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Proyecto') + ylab('Monto(CLP)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[570]:
<ggplot: (139750221419)>
In [571]:
#Contero de apariciones en el top por mes por proyecto
categoria_count = stats_top.groupby('Proyecto ')['Proyecto '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Proyecto ':'Meses en top'}).reset_index()
Out[571]:
Proyecto Meses en top
0 PLACILLA -Transporte Ruta 10
1 100 - Santiago (Quilicura) 9
2 570 -Temuco Municipal 9
3 550 - Puerto Montt 9
4 500 - Concepcion 7
5 MARITIMO - Transporte Ruta 6
6 INDUSTRIAL - Transporte Ruta 6
7 300 - Vina del Mar 6
8 109 - Arauco 5
9 120 - Las Condes 5
10 SANTIAGO -Transporte Ruta 4
11 CONCEPCION -Transporte Ruta 4
12 TEMUCO -Transporte Ruta 3
13 170 - Penalolen Recoleccion 3
14 402 - GERENCIA 2
15 112 - Arauco Mapa 2
16 400 - Rancagua Industrial 2
17 106 - San Felipe 1
18 380-ESTRUCTURA (CASA MATRIZ) 1
19 180- La Reina Recoleccion 1
20 160 - Maipu Zona B Recoleccion 1
21 150 - Maipu Zona A Recoleccion 1
22 130 - Rancagua Recoleccion 1
23 PUERTO MONTT -Transporte Ruta 1
24 SANTIAGO -Tratamiento 1
In [572]:
#Proyectos que se repiten en el top mensual segun criterio (como minimo estar en el top la mitad de los meses)

#n_month = 2 #Criterio, como minimo estar en el top 2 meses
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses

df_top_categorias = df[df['Proyecto '].isin(categoria_count[categoria_count >= n_month].index.to_list())]

df_top_categorias['Proyecto '] = pd.Categorical(df_top_categorias['Proyecto '], categories=categoria_count[categoria_count >= n_month].index.to_list())

ggplot(df_top_categorias, aes(y='Cantidad ', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proyecto ', ncol = 1, scales='free') + coord_flip()  +  theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[572]:
<ggplot: (139753653843)>
In [573]:
#Estadisticas por poyecto del top mes a mes
stats = df_top_categorias.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df_top_categorias.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df_top_categorias.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[573]:
Proyecto Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 PLACILLA -Transporte Ruta 2022-01 38.000000000 53731.894736842 77543.030164701 1400.000000000 9550.000000000 16855.000000000 69710.000000000 354724.000000000 5.311566664 2041812.000000000
1 PLACILLA -Transporte Ruta 2022-02 34.000000000 84421.764705882 80203.380774756 1000.000000000 42137.500000000 57500.000000000 91852.500000000 338300.000000000 2.886146866 2870340.000000000
2 PLACILLA -Transporte Ruta 2022-03 54.000000000 87056.000000000 130653.440522397 4080.000000000 16300.000000000 39500.000000000 106626.750000000 781900.000000000 16.363198385 4701024.000000000
3 PLACILLA -Transporte Ruta 2022-04 34.000000000 89087.117647059 65409.739816286 6000.000000000 47747.500000000 77575.000000000 125432.500000000 324667.000000000 4.275231995 3028962.000000000
4 PLACILLA -Transporte Ruta 2022-05 45.000000000 109292.000000000 238023.502942175 5500.000000000 35000.000000000 60000.000000000 98000.000000000 1619288.000000000 38.993021910 4918140.000000000
In [578]:
#plot Monto total por mes por proyecto del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Proyecto ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[578]:
<ggplot: (139750795255)>
In [575]:
#Monto total por proyecto y por volumen de registros del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Proyecto ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[575]:
<ggplot: (139750513121)>
In [581]:
#plot volumen de registros por mes por proyecto del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Proyecto ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[581]:
<ggplot: (139742609060)>
In [582]:
#plot volumen de registros por mes y monto total por proyecto del top mes a mes

ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Proyecto ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[582]:
<ggplot: (139750507124)>
In [583]:
#boxplot monto por mes por proyecto
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[583]:
<ggplot: (139860941248)>
In [584]:
#Violin monto por mes por proyecto
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[584]:
<ggplot: (139743148888)>
In [585]:
#Mediana vs promedio por proyecto
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Proyecto ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Proyecto '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Proyecto ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[585]:
<ggplot: (139862015328)>

Monto por Categoria

In [586]:
#Estadisticas por categoria (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[586]:
Categoría Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 ADITIVO BLUE MAX 2022-03 1.000000000 46500.000000000 NaN 46500.000000000 46500.000000000 46500.000000000 46500.000000000 46500.000000000 NaN 46500.000000000
1 ADITIVO BLUE MAX 2022-04 2.000000000 24800.000000000 20930.360723122 10000.000000000 17400.000000000 24800.000000000 32200.000000000 39600.000000000 NaN 49600.000000000
2 ADITIVO BLUE MAX 2022-06 1.000000000 5010.000000000 NaN 5010.000000000 5010.000000000 5010.000000000 5010.000000000 5010.000000000 NaN 5010.000000000
3 ADITIVO BLUE MAX 2022-10 1.000000000 10000.000000000 NaN 10000.000000000 10000.000000000 10000.000000000 10000.000000000 10000.000000000 NaN 10000.000000000
4 Abogados 2022-05 1.000000000 6000.000000000 NaN 6000.000000000 6000.000000000 6000.000000000 6000.000000000 6000.000000000 NaN 6000.000000000
In [587]:
#top categorias con mayor monto por mes (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(5)
Out[587]:
Categoría Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
Index
0 Peajes 2022-01 201.000000000 50752.786069652 203617.453847157 200.000000000 1950.000000000 15000.000000000 42000.000000000 2176000.000000000 84.337639371 10201310.000000000
1 Manutencion - Colacion 2022-01 260.000000000 26383.076038462 51245.734643259 886.600000000 5075.000000000 10545.000000000 20462.250000000 405590.000000000 21.749169761 6859599.770000000
2 Impto veh. - P.circulacion - Rev. Tecnica - SOAP 2022-01 86.000000000 69221.186046512 123084.973561167 936.000000000 11800.000000000 18500.000000000 64500.000000000 617100.000000000 8.827482721 5953022.000000000
3 Repuestos y mantto equipos - Veh. pesados 2022-01 86.000000000 59375.802325581 154160.596802229 720.000000000 8810.000000000 18907.500000000 41650.000000000 981750.000000000 23.891108849 5106319.000000000
4 Peajes / Estacionamiento 2022-01 115.000000000 41536.591304348 108913.953434673 700.000000000 2900.000000000 6750.000000000 24925.000000000 749600.000000000 24.595371489 4776708.000000000
In [589]:
#Top Categoría por monto total por mes 
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]

categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)


ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Categoría') + ylab('Monto(CLP)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[589]:
<ggplot: (139753960163)>
In [590]:
#Contero de apariciones en el top por mes por categoría
categoria_count = stats_top.groupby('Categoría ')['Categoría '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Categoría ':'Meses en top'}).reset_index()
Out[590]:
Categoría Meses en top
0 Combustibles - Diesel y gasolina 10
1 Peajes / Estacionamiento 10
2 Manutencion - Colacion 10
3 Peajes 10
4 Repuestos y mantto equipos - Veh. pesados 9
5 Impto veh. - P.circulacion - Rev. Tecnica - SOAP 7
6 Vehiculo priv-Uber, taxi, gastos traslados 6
7 Instalaciones - Mantencion 6
8 Mantenimiento Vehiculos 6
9 Otros gastos direct - Multas 5
10 Resto de coste indirecto - Multas 3
11 Alojamientos 3
12 Base operaciones - Arriendo 3
13 Impuestos- Notaria -Certif - Doc.Legal 3
14 Pasajes Aereos y terrestres al int (en Chile) 2
15 Otros - Correo / Fotocopia 1
16 Otros servicios de Asesorias 1
17 Materiales de oficina 1
18 Repuestos equipos y maquina 1
19 Gasolina 1
20 De instalac. Y obras (mant. instalaciones) 1
21 Neumaticos - Recauchajes 1
In [591]:
#Categoría que se repiten en el top mensual segun criterio (como minimo estar en el top la mitad de los meses)

#n_month = 2 #Criterio, como minimo estar en el top 2 meses
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses

df_top_categorias = df[df['Categoría '].isin(categoria_count[categoria_count >= n_month].index.to_list())]

df_top_categorias['Categoría '] = pd.Categorical(df_top_categorias['Categoría '], categories=categoria_count[categoria_count >= n_month].index.to_list())

ggplot(df_top_categorias, aes(y='Cantidad ', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Categoría ', ncol = 1, scales='free') + coord_flip()  +  theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[591]:
<ggplot: (139868743981)>
In [592]:
#Categoría por poyecto del top mes a mes
stats = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[592]:
Categoría Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 Combustibles - Diesel y gasolina 2022-01 84.000000000 29576.488095238 42745.577354967 2800.000000000 5000.000000000 17819.500000000 37000.500000000 263471.000000000 15.079063649 2484425.000000000
1 Combustibles - Diesel y gasolina 2022-02 90.000000000 31272.700000000 48911.621483038 2000.000000000 5000.000000000 19010.000000000 29803.000000000 298737.000000000 16.002014184 2814543.000000000
2 Combustibles - Diesel y gasolina 2022-03 90.000000000 30672.111111111 62743.840831787 3000.000000000 5000.000000000 20000.000000000 30000.000000000 535639.000000000 49.657136142 2760490.000000000
3 Combustibles - Diesel y gasolina 2022-04 86.000000000 24785.267441860 25931.398326410 3000.000000000 9953.500000000 15000.000000000 30000.000000000 126035.000000000 5.245470202 2131533.000000000
4 Combustibles - Diesel y gasolina 2022-05 118.000000000 24245.454152542 27119.972963096 3000.000000000 7500.000000000 20000.000000000 27338.500000000 174438.000000000 10.761886579 2860963.590000000
In [593]:
#plot Monto total por mes por categoria del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[593]:
<ggplot: (139743302885)>
In [594]:
#Monto total por categoria y por volumen de registros del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[594]:
<ggplot: (139751074744)>
In [595]:
#plot volumen de registros por mes por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[595]:
<ggplot: (139862037102)>
In [596]:
#plot volumen de registros por mes y monto total por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[596]:
<ggplot: (139740101194)>
In [597]:
#boxplot monto por mes por categoria
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[597]:
<ggplot: (139740790148)>
In [598]:
#Violin monto por mes por categoria
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[598]:
<ggplot: (139856707065)>
In [599]:
#Mediana vs promedio por categoría
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Categoría ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Categoría '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[599]:
<ggplot: (139862920072)>

Monto por Proveedor

In [866]:
#Estadisticas por proveedor (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[866]:
Proveedor Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 "1 JUZGADO POLICIA LOCAL MAIPU " 2022-10 3.000000000 317840.000000000 191571.122367125 178785.000000000 208582.500000000 238380.000000000 387367.500000000 536355.000000000 NaN 953520.000000000
1 '-- 2022-07 1.000000000 1200.000000000 NaN 1200.000000000 1200.000000000 1200.000000000 1200.000000000 1200.000000000 NaN 1200.000000000
2 '-- 2022-08 2.000000000 375295.000000000 529347.207464061 990.000000000 188142.500000000 375295.000000000 562447.500000000 749600.000000000 NaN 750590.000000000
3 '--- 2022-07 1.000000000 42317.000000000 NaN 42317.000000000 42317.000000000 42317.000000000 42317.000000000 42317.000000000 NaN 42317.000000000
4 '--- 2022-08 1.000000000 196162.000000000 NaN 196162.000000000 196162.000000000 196162.000000000 196162.000000000 196162.000000000 NaN 196162.000000000
In [868]:
#top proveedores con mayor monto por mes (head 10)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(10)
Out[868]:
Proveedor Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
Index
0 RUTA DE LA ARAUCANIA SOCIEDAD CONCESIONARIA S.A 2022-01 4.000000000 842300.000000000 617969.697207449 500000.000000000 500000.000000000 551400.000000000 893700.000000000 1766400.000000000 3.870873076 3369200.000000000
1 BYF CORREDORES DE SEGUROS LTDA 2022-01 22.000000000 149040.909090909 152476.807476356 16500.000000000 46950.000000000 86000.000000000 203625.000000000 617100.000000000 3.253351655 3278900.000000000
2 Ruta de la Araucanía 2022-01 2.000000000 1355800.000000000 1159937.963858413 535600.000000000 945700.000000000 1355800.000000000 1765900.000000000 2176000.000000000 NaN 2711600.000000000
3 PEAJES 2022-01 14.000000000 127977.142857143 97321.271643386 8500.000000000 33175.000000000 131870.000000000 191932.500000000 286330.000000000 -1.156886909 1791680.000000000
4 soc. y turismo cascada de las Animas 2022-01 1.000000000 1742650.000000000 NaN 1742650.000000000 1742650.000000000 1742650.000000000 1742650.000000000 1742650.000000000 NaN 1742650.000000000
5 REEMBOLSO PARA ASESORIAS JURIDICAS Y PROFESION... 2022-01 1.000000000 1598711.000000000 NaN 1598711.000000000 1598711.000000000 1598711.000000000 1598711.000000000 1598711.000000000 NaN 1598711.000000000
6 JOSE SANTOS 2022-01 3.000000000 530343.333333333 396300.969760778 73780.000000000 402815.000000000 731850.000000000 758625.000000000 785400.000000000 NaN 1591030.000000000
7 COPEC 2022-01 57.000000000 27576.210526316 32635.294681053 2000.000000000 9400.000000000 20000.000000000 37004.000000000 192180.000000000 14.739997056 1571844.000000000
8 VIATICOS 2022-01 2.000000000 780129.000000000 1017236.744347155 60834.000000000 420481.500000000 780129.000000000 1139776.500000000 1499424.000000000 NaN 1560258.000000000
9 BCI SEGUROS GENERALES 2022-01 1.000000000 996946.000000000 NaN 996946.000000000 996946.000000000 996946.000000000 996946.000000000 996946.000000000 NaN 996946.000000000
In [870]:
#Top Proveedores por monto total por mes 
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]

categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)

ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Proveedor') + ylab('Monto(CLP)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[870]:
<ggplot: (139879355323)>
In [604]:
#Contero de apariciones en el top por mes por proveedor
categoria_count = stats_top.groupby('Proveedor ')['Proveedor '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Proveedor ':'Meses en top'}).reset_index()
Out[604]:
Proveedor Meses en top
0 PEAJES 10
1 COPEC 9
2 ruta de la araucania soc. concesionaria s.a 6
3 Ruta del bosque sociedad concesionaria s.a 6
4 VIATICOS 4
5 JOSE SANTOS 4
6 Copec 4
7 MUNICIPALIDAD DE QUILICURA 3
8 I.M.QUILICURA 3
9 DANTE ANDRES AMIGO MOLINA 3
10 Veolia 2
11 Terminal Portuario De Valparaiso S.A. 2
12 JOSE LUIS SANTOS GONZALEZ 2
13 soc. y turismo cascada de las Animas 2
14 Esmax Red Limitada 1
15 GLADYS FABIOLA ILUFI 1
16 REEMBOLSO PARA ASESORIAS JURIDICAS Y PROFESION... 1
17 RUTA DE LA ARAUCANIA SOCIEDAD CONCESIONARIA S.A 1
18 Ruta de la Araucanía 1
19 Ruta del Bosque 1
20 CARTONI 1
21 SERVICIO AGRICOLA GANADERO 1
22 SERVICIOS GASTRONOMICOS ESTER 1
23 TERMINAL CERROS DE VALPARAISO SA 1
24 TERMINAL PORTUARIO DE VALPARAISO 1
25 Bci 1
26 Transporte chamorro e hijos spa 1
27 BYF CORREDORES DE SEGUROS LTDA 1
28 BCI SEGUROS GENERALES 1
29 W Boston Hotel 1
30 ATTON VITACURA SPA 1
31 ruta del bosque soc. concesionaria s.a 1
32 ruta del bosque sociedad consecionaria s.a 1
33 REEMBOLSO PARA ASESORIAS JURIDICAS Y PROFESION... 1
34 COMERCIAL Y SERVICIOS G Y F LTDA. 1
35 PATENTE MUNICIPAL 1
36 JOSÉ LUIS SANTOS GONZÁLEZ 1
37 I.M.QUILICDURA 1
38 Eco Portuaria de Residuos SPA 1
39 I.MUNICIPALIDAD QUILICURA CC 112 1
40 I.MUNICIPALIDAD QUILICURA CC 130 1
41 Ilustre Municipalidad de Requinoa 1
42 Cartoni Vehiculos Industriales Limitada 1
43 CURIFOR 1
44 José Santos González 1
45 Notario Santiago Andres Felipe Rieutord Alvarado 1
46 MAESTRANZA Y PROYECTOS DE INGENIERIA Y MONTAJE... 1
47 MUNICIPALIDAD D QUILICURA 1
48 1 Y 2 JUZGADO 1
49 MUNICIPALIDAD DE QUILICURA CC 100 1
50 MUNICIPALIDAD DE SAN FELIPE 1
51 MUNICIPALIDAD QUILICURA 1
52 Ministerio de Salud 1
53 1 JUZGADO POLICIA LOCAL MAIPU 1
In [605]:
#Proveedores que se repiten en el top mensual segun criterio (como minimo estar en el top la mitad de los meses)

n_month = 2 #Criterio, como minimo estar en el top 2 meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses

df_top_categorias = df[df['Proveedor '].isin(categoria_count[categoria_count >= n_month].index.to_list())]

df_top_categorias['Proveedor '] = pd.Categorical(df_top_categorias['Proveedor '], categories=categoria_count[categoria_count >= n_month].index.to_list())

ggplot(df_top_categorias, aes(y='Cantidad ', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proveedor ', ncol = 1, scales='free') + coord_flip()  +  theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[605]:
<ggplot: (139753682386)>
In [784]:
#Proveedor por poyecto del top mes a mes
stats = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[784]:
Proveedor Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max kurt sum
0 PEAJES 2022-01 14.000000000 127977.142857143 97321.271643386 8500.000000000 33175.000000000 131870.000000000 191932.500000000 286330.000000000 -1.156886909 1791680.000000000
1 PEAJES 2022-02 26.000000000 93268.846153846 107457.669594196 4300.000000000 24655.000000000 38070.000000000 128475.000000000 403350.000000000 2.148855632 2424990.000000000
2 PEAJES 2022-03 43.000000000 109150.697674419 70188.884935930 6200.000000000 70400.000000000 99950.000000000 134025.000000000 351800.000000000 2.425672815 4693480.000000000
3 PEAJES 2022-04 36.000000000 108940.166666667 72995.463198945 3750.000000000 60204.500000000 103975.000000000 143326.000000000 269600.000000000 -0.111510536 3921846.000000000
4 PEAJES 2022-05 55.000000000 75799.345454545 73350.073709529 6450.000000000 21610.000000000 51250.000000000 99580.000000000 372400.000000000 6.027163088 4168964.000000000
In [785]:
#plot Monto total por mes por proveedor del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[785]:
<ggplot: (139872382883)>
In [788]:
#Monto total por proveedor y por volumen de registros del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[788]:
<ggplot: (139863332555)>
In [609]:
#plot volumen de registros por mes por proveedor del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[609]:
<ggplot: (139751265695)>
In [789]:
#plot volumen de registros por mes y monto total por proveedor del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° registros') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Monto (CLP)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[789]:
<ggplot: (139872343860)>
In [790]:
#boxplot monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[790]:
<ggplot: (139741146362)>
In [791]:
#Violin monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[791]:
<ggplot: (139747279014)>
In [792]:
#Mediana vs promedio por proveedor
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Proveedor ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Proveedor '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[792]:
<ggplot: (139751251788)>

Tiempo

Tiempo total

In [721]:
#Estadisticas de tiempo de aparicion de un nuevo registro
df.sort_values('Fecha de la transacción ')['Fecha de la transacción '].diff().describe().to_frame()
Out[721]:
Fecha de la transacción
count 17934
mean 0 days 00:24:19.752425560
std 0 days 03:06:12.787722180
min 0 days 00:00:00
25% 0 days 00:00:00
50% 0 days 00:00:00
75% 0 days 00:00:00
max 2 days 00:00:00
In [843]:
#Estadisticas de delta Fecha aprobación y transacción
stats = df.groupby(['Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
Out[843]:
Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 2022-01 1555 18 days 21:10:57.197054662 24 days 19:34:25.215532252 0 days 12:50:17.203000 3 days 22:16:23.328500 9 days 14:58:33.800000 22 days 14:53:28.678500 228 days 21:13:48.960000
1 2022-02 1456 19 days 11:19:40.332569368 29 days 02:34:24.933876799 0 days 14:02:14.193000 3 days 14:27:01.956000 7 days 16:01:03.323500 21 days 13:11:15.115000 270 days 16:53:33.807000
2 2022-03 1940 17 days 10:52:43.102945360 24 days 11:28:06.874882260 -1 days +21:52:47.503000 3 days 00:08:24.157500 7 days 22:17:33.720000 20 days 11:44:42.670500 198 days 22:08:33.350000
3 2022-04 1769 15 days 00:54:28.449754663 19 days 07:15:59.656174233 -1 days +23:42:33.843000 3 days 15:45:14.700000 7 days 17:04:33.027000 19 days 16:23:36.283000 186 days 23:14:41.357000
4 2022-05 1872 14 days 08:10:16.572301816 19 days 19:42:17.009894342 0 days 14:13:59.123000 2 days 19:54:54.715000 6 days 18:28:19.267000 16 days 17:45:45.241000 162 days 19:47:10.223000
5 2022-06 1819 14 days 18:42:25.959337548 18 days 05:20:42.682629312 0 days 14:19:43.453000 3 days 21:14:01.320000 7 days 18:32:58.383000 17 days 14:59:20.353000 145 days 15:10:20.287000
6 2022-07 1741 14 days 08:23:38.146231476 16 days 11:17:39.340641971 0 days 08:42:10.987000 3 days 20:48:04.517000 7 days 18:54:53.960000 19 days 15:59:42.053000 119 days 21:01:24.317000
7 2022-08 2074 13 days 05:54:44.148643201 16 days 05:42:17.130599143 0 days 15:04:06.823000 3 days 15:04:10.416750 6 days 21:00:56.403500 14 days 18:29:41.061750 89 days 15:05:07.150000
8 2022-09 1903 11 days 19:54:49.191524960 11 days 09:30:28.702765606 0 days 16:03:17.837000 3 days 23:59:29.065000 7 days 13:56:15.163000 14 days 20:53:30.908500 67 days 15:42:43.480000
9 2022-10 1726 7 days 17:38:24.388206836 6 days 04:05:16.539321157 0 days 13:50:08.760000 3 days 00:02:42.713250 6 days 01:05:43.180000 10 days 16:54:13.847750 35 days 19:17:31.800000
In [845]:
# Mediana de delta Fecha aprobación y transacción por mes
stats = df.groupby(['Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = '50%')) + geom_bar(stat='sum', fill='#0C475B') + coord_flip() + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + theme(legend_position="none")
Out[845]:
<ggplot: (139752120146)>
In [846]:
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = '50%', fill='count')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + guides(size = False) + labs(fill = "N° registros")
Out[846]:
<ggplot: (139868888068)>
In [847]:
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'count', fill='50%')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('N° de registros') + theme_bw() + guides(size = False) + labs(fill = "Tiempo (dias)")
Out[847]:
<ggplot: (139872368223)>
In [796]:
#boxplot de delta Fecha aprobación y transacción por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + theme(axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
Out[796]:
<ggplot: (139866649006)>
In [797]:
#Violin de delta Fecha aprobación y transacción por mes

ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + theme(axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
Out[797]:
<ggplot: (139869294271)>
In [798]:
#Mediana vs promedio por proveedor
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
Out[798]:
<ggplot: (139741668860)>

Tiempo por tipo de documento

In [848]:
#Estadisticas de delta fecha aprobacon y transaccion por mes y tipo de documento (head 10)
stats = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(10)
Out[848]:
Tipo Documento Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 CHL - Boleta 2022-03 1 188 days 13:26:38.007000 NaT 188 days 13:26:38.007000 188 days 13:26:38.007000 188 days 13:26:38.007000 188 days 13:26:38.007000 188 days 13:26:38.007000
1 CHL - Boleta 2022-04 1 186 days 23:14:41.357000 NaT 186 days 23:14:41.357000 186 days 23:14:41.357000 186 days 23:14:41.357000 186 days 23:14:41.357000 186 days 23:14:41.357000
2 CHL - Boleta 2022-05 1 162 days 19:47:10.223000 NaT 162 days 19:47:10.223000 162 days 19:47:10.223000 162 days 19:47:10.223000 162 days 19:47:10.223000 162 days 19:47:10.223000
3 CHL - Boleta 2022-06 4 97 days 13:44:48.523500 16 days 15:46:46.064714821 88 days 13:51:18.990000 89 days 07:50:53.685000 89 days 13:58:45.588500 97 days 19:52:40.427000 122 days 13:10:23.927000
4 CHL - Boleta 2022-07 20 87 days 10:32:07.775200 12 days 15:26:21.387318070 70 days 13:39:14.393000 81 days 03:20:17.999750 82 days 03:20:44.356500 86 days 06:28:15.724000 116 days 19:39:16.480000
5 CHL - Boleta 2022-08 29 54 days 14:06:52.436586207 13 days 02:09:39.611182136 28 days 19:44:45.490000 48 days 03:20:44.307000 54 days 03:20:44.457000 64 days 13:39:14.380000 85 days 20:18:29.310000
6 CHL - Boleta 2022-09 135 19 days 06:48:08.177185185 14 days 18:57:52.318220474 1 days 16:46:56.587000 5 days 23:00:51.081500 14 days 13:50:08.770000 30 days 16:38:54.052000 62 days 12:58:06.420000
7 CHL - Boleta 2022-10 320 7 days 11:46:46.143328125 5 days 07:25:03.620069586 0 days 17:48:29.817000 3 days 18:09:26.590000 6 days 13:22:39.926500 9 days 17:43:27.669750 29 days 13:27:32.040000
8 CHL - Boleta de Honorario 2022-09 6 17 days 19:04:37.852666666 16 days 06:12:07.804726302 4 days 13:13:44.107000 8 days 19:13:41.348000 14 days 06:21:19.053000 16 days 10:14:52.883500 49 days 13:27:31.963000
9 CHL - Boleta de Honorario 2022-10 10 10 days 16:07:58.081000 7 days 07:50:11.388938659 1 days 16:25:19.957000 6 days 02:57:53.589500 9 days 02:23:45.365000 12 days 11:45:41.438250 24 days 21:43:49.493000
In [849]:
#Delta fecha aprobacon y transaccion por mes por tipo de documento

stats = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') +  facet_wrap('Tipo Documento', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[849]:
<ggplot: (139872950762)>
In [803]:
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes por tipo de documento
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[803]:
<ggplot: (139752930154)>
In [804]:
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes por tipo de documento
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[804]:
<ggplot: (139872930621)>
In [809]:
#Volumen de registros y Mediana de delta Fecha aprobación y transacción por mes y Tipo de documento

ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='50%')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8),  strip_background=element_blank())  + labs(fill = "Tiempo (dias)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[809]:
<ggplot: (139752887333)>
In [813]:
#boxplot de delta Fecha aprobación y transacción por mes por Tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[813]:
<ggplot: (139865065067)>
In [814]:
#Violin monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[814]:
<ggplot: (139861959762)>
In [850]:
#Mediana vs promedio de delta aprobacion y transaccion por tipo de documento
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Tipo Documento', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Tipo Documento'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[850]:
<ggplot: (139861614091)>

Tiempo por Fondo fijo / Anticipo / Reembolso

In [851]:
#Estadisticas delta fecha aprobacion y transaccion por fondo fijo
stats = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(10)
Out[851]:
Fondo fijo Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 ANTICIPO (VxR) 2022-01 57 42 days 12:18:29.786263158 42 days 07:01:22.614423353 0 days 19:58:53.307000 3 days 21:26:10.023000 21 days 15:24:56.447000 86 days 18:30:49.597000 112 days 18:31:03.363000
1 ANTICIPO (VxR) 2022-02 48 45 days 11:53:03.967416666 30 days 20:19:12.781808137 0 days 16:21:41.257000 5 days 15:59:18.485250 61 days 18:30:50.125000 69 days 02:38:27.969500 83 days 17:37:37.997000
2 ANTICIPO (VxR) 2022-03 165 19 days 08:35:53.049606060 20 days 23:44:49.244160100 0 days 16:48:36.133000 1 days 22:51:14.913000 12 days 19:15:32.710000 28 days 19:59:26.770000 126 days 23:37:35.993000
3 ANTICIPO (VxR) 2022-04 130 10 days 08:35:51.068430769 15 days 04:55:28.155007794 0 days 17:05:40.427000 1 days 23:31:45.774000 4 days 00:29:43.963000 12 days 08:55:05.496000 106 days 17:00:23.360000
4 ANTICIPO (VxR) 2022-05 96 7 days 00:14:06.055718750 16 days 05:25:24.551002415 0 days 18:51:46.613000 0 days 23:21:46.061750 1 days 15:59:55.620000 3 days 16:07:03.498000 85 days 17:03:57.133000
5 ANTICIPO (VxR) 2022-06 120 17 days 13:09:39.805350 20 days 22:27:19.819347349 0 days 15:59:27.717000 3 days 19:09:04.298500 7 days 17:34:00.488500 21 days 21:40:16.892000 80 days 22:47:48.973000
6 ANTICIPO (VxR) 2022-07 57 19 days 11:55:20.740631579 23 days 19:53:50.990367748 0 days 15:40:20.123000 4 days 17:22:37.470000 9 days 17:00:17.203000 21 days 21:01:59.157000 101 days 14:11:19.690000
7 ANTICIPO (VxR) 2022-08 90 5 days 15:06:44.713022222 8 days 01:50:56.447153996 0 days 16:59:50.297000 1 days 00:05:36.224250 3 days 14:21:31.158500 8 days 17:02:45.045500 53 days 13:37:09.197000
8 ANTICIPO (VxR) 2022-09 36 9 days 02:19:57.194500 6 days 20:50:40.054437503 0 days 21:26:11.893000 3 days 11:49:34.940000 7 days 10:49:34.060000 14 days 18:35:37.204250 30 days 02:29:38.807000
9 ANTICIPO (VxR) 2022-10 108 4 days 10:10:08.426481481 5 days 11:13:45.387557375 0 days 15:00:15.863000 1 days 16:37:23.112000 2 days 14:20:40.833000 5 days 14:52:20.294750 34 days 19:52:04.457000
In [852]:
#plot delta medio de fecha de aprobacion y transaccion por mes y fondo fijo
stats = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') +  facet_wrap('Fondo fijo', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[852]:
<ggplot: (139877711032)>
In [824]:
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes por fondo fijo
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[824]:
<ggplot: (139879453597)>
In [825]:
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes por fondo fijo
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[825]:
<ggplot: (139879534406)>
In [826]:
#Volumen de registros y Mediana de delta Fecha aprobación y transacción por mes y fondo fijo

ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='50%')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8),  strip_background=element_blank())  + labs(fill = "Tiempo (dias)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[826]:
<ggplot: (139879545160)>
In [827]:
#boxplot de delta Fecha aprobación y transacción por mes por Tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[827]:
<ggplot: (139879976036)>
In [828]:
#Violin monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[828]:
<ggplot: (139880385786)>
In [853]:
#Mediana vs promedio de delta aprobacion y transaccion por Fondo fijo
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Fondo fijo', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Fondo fijo'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[853]:
<ggplot: (139878471374)>

Tiempo por Sucursal

In [854]:
#Estadisticas delta fecha aprobacion y transaccion por sucursal (head 10)
stats = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(10)
Out[854]:
Sucursal Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 GESTION INTEGRAL DE RESIDUOS SPA 2022-01 28 57 days 23:38:26.290428571 13 days 17:45:14.510641479 43 days 12:46:05.287000 50 days 18:54:36.242500 57 days 12:46:03.315000 59 days 19:45:46.077000 117 days 13:50:32.827000
1 GESTION INTEGRAL DE RESIDUOS SPA 2022-02 19 39 days 10:27:45.428736842 29 days 16:27:48.329952471 2 days 13:47:50.523000 7 days 13:47:43.631500 35 days 17:55:44.387000 54 days 17:55:44.350000 102 days 13:50:32.863000
2 GESTION INTEGRAL DE RESIDUOS SPA 2022-03 30 37 days 06:00:26.199200 31 days 05:24:19.004548532 5 days 14:21:29.950000 28 days 14:58:06.312500 35 days 17:28:04.481500 42 days 07:57:52.253500 188 days 14:48:12.293000
3 GESTION INTEGRAL DE RESIDUOS SPA 2022-04 35 33 days 12:03:53.701628571 46 days 07:24:17.980019026 0 days 19:57:52.253000 5 days 02:48:55.950000 22 days 13:50:32.833000 30 days 13:50:32.847000 154 days 17:58:06.410000
4 GESTION INTEGRAL DE RESIDUOS SPA 2022-05 43 22 days 04:23:36.475837209 25 days 07:03:21.698097645 1 days 13:50:32.893000 7 days 17:33:32.812000 17 days 13:50:04.303000 22 days 21:23:48.495000 130 days 17:59:06.610000
5 GESTION INTEGRAL DE RESIDUOS SPA 2022-06 31 32 days 11:21:37.081516129 21 days 00:09:50.005022663 0 days 21:17:01.363000 6 days 15:04:31.781500 41 days 14:03:13.327000 43 days 02:03:13.336500 94 days 22:38:25.903000
6 GESTION INTEGRAL DE RESIDUOS SPA 2022-07 34 31 days 02:06:18.513176470 14 days 09:59:56.946721544 6 days 14:57:48.287000 21 days 20:57:48.192500 30 days 14:40:26.686500 43 days 02:40:26.651750 76 days 17:57:27.143000
7 GESTION INTEGRAL DE RESIDUOS SPA 2022-08 26 35 days 15:54:04.329884615 15 days 18:30:51.527351213 3 days 14:40:26.867000 26 days 04:38:40.417000 33 days 23:00:41.316500 43 days 10:37:32.876750 85 days 20:18:29.310000
8 GESTION INTEGRAL DE RESIDUOS SPA 2022-09 45 20 days 22:08:18.546200 12 days 12:21:56.435051992 1 days 13:26:34.960000 12 days 22:38:26.290000 19 days 18:38:10.653000 31 days 13:51:58.933000 47 days 14:39:21.507000
9 GESTION INTEGRAL DE RESIDUOS SPA 2022-10 43 8 days 22:41:21.350465116 5 days 21:36:42.481133790 1 days 00:13:23.707000 4 days 06:55:24.408500 7 days 13:21:03.310000 14 days 01:52:34.205000 21 days 15:32:55.823000
In [855]:
#plot delta medio de fecha de aprobacion y transaccion por mes y sucursal

stats = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') +  facet_wrap('Sucursal ', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[855]:
<ggplot: (139751054430)>
In [834]:
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes por sucursal
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[834]:
<ggplot: (139869304430)>
In [835]:
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes por sucursal
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[835]:
<ggplot: (139855331289)>
In [836]:
#Volumen de registros y Mediana de delta Fecha aprobación y transacción por mes y sucursal
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='50%')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8),  strip_background=element_blank())  + labs(fill = "Tiempo (dias)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[836]:
<ggplot: (139741147395)>
In [837]:
#boxplot de delta Fecha aprobación y transacción por mes por sucursal
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[837]:
<ggplot: (139869284007)>
In [838]:
#Violin monto por mes por sucursal
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[838]:
<ggplot: (139869929689)>
In [856]:
#Mediana vs promedio de delta aprobacion y transaccion por Sucursal  
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Sucursal ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Sucursal '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[856]:
<ggplot: (139880470312)>

Tiempo por Categoria

In [877]:
#Estadisticas delta fecha aprobacion y transaccion por sucursal (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
Out[877]:
Categoría Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 ADITIVO BLUE MAX 2022-03 1 29 days 15:55:12.203000 NaT 29 days 15:55:12.203000 29 days 15:55:12.203000 29 days 15:55:12.203000 29 days 15:55:12.203000 29 days 15:55:12.203000
1 ADITIVO BLUE MAX 2022-04 2 2 days 06:52:34.051500 2 days 07:15:24.083663809 0 days 15:48:13.483000 1 days 11:20:23.767250 2 days 06:52:34.051500 3 days 02:24:44.335750 3 days 21:56:54.620000
2 ADITIVO BLUE MAX 2022-06 1 6 days 18:35:34.717000 NaT 6 days 18:35:34.717000 6 days 18:35:34.717000 6 days 18:35:34.717000 6 days 18:35:34.717000 6 days 18:35:34.717000
3 ADITIVO BLUE MAX 2022-10 1 1 days 21:33:58.767000 NaT 1 days 21:33:58.767000 1 days 21:33:58.767000 1 days 21:33:58.767000 1 days 21:33:58.767000 1 days 21:33:58.767000
4 Abogados 2022-05 1 19 days 20:20:31.667000 NaT 19 days 20:20:31.667000 19 days 20:20:31.667000 19 days 20:20:31.667000 19 days 20:20:31.667000 19 days 20:20:31.667000
In [878]:
#top categorias con mayor delta medio de fecha aprobacion y transaccion por mes (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(10)
Out[878]:
Categoría Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
Index
0 Repuestos equipos y maquinaria 2022-01 1 63 days 15:16:47.693000 NaT 63 days 15:16:47.693000 63 days 15:16:47.693000 63 days 15:16:47.693000 63 days 15:16:47.693000 63 days 15:16:47.693000
1 Materiales seguridad (Lentes, mascara, chaleco... 2022-01 2 49 days 01:50:07.913500 59 days 12:28:55.021935637 6 days 23:44:46.527000 28 days 00:47:27.220250 49 days 01:50:07.913500 70 days 02:52:48.606750 91 days 03:55:29.300000
2 Gastos de formacion y capacitaciones 2022-01 2 46 days 02:14:02.323500 20 days 11:59:50.033559569 31 days 14:20:21.840000 38 days 20:17:12.081750 46 days 02:14:02.323500 53 days 08:10:52.565250 60 days 14:07:42.807000
3 Resto de coste indirecto - Multas 2022-01 3 53 days 19:46:11.259000 33 days 02:43:15.739928576 32 days 14:20:48.307000 34 days 17:54:14.232000 36 days 21:27:40.157000 64 days 10:28:52.735000 91 days 23:30:05.313000
4 Alquiler estacionamiento 2022-01 6 42 days 20:08:46.656666666 39 days 13:24:02.406469751 5 days 16:30:34.813000 10 days 10:44:07.576500 31 days 01:57:42.697000 78 days 04:23:38.434250 91 days 17:34:46.850000
5 Transporte 2022-01 19 31 days 07:28:13.975789473 19 days 02:23:50.853082166 3 days 17:48:35.467000 20 days 03:15:52.956500 26 days 15:15:52.943000 44 days 18:13:38.367000 63 days 20:00:17.633000
6 Peajes / estacionamiento 2022-01 7 67 days 15:56:16.963142857 81 days 10:43:58.391147449 4 days 15:35:56.663000 10 days 14:12:17.493000 26 days 15:15:52.873000 96 days 03:31:52.630000 228 days 21:13:48.960000
7 Peajes (Estructura) 2022-01 71 35 days 03:24:32.117225352 34 days 06:55:34.085707294 1 days 15:02:04.650000 13 days 15:20:22.148000 26 days 01:05:43.193000 35 days 15:50:08.343500 118 days 03:55:29.213000
8 Resto de coste indirec-Atencion y Aportes Social 2022-01 4 37 days 06:16:25.966500 42 days 13:26:59.600107242 2 days 00:50:17.343000 7 days 18:55:45.878250 25 days 15:43:15.745000 55 days 03:03:55.833250 95 days 16:48:55.033000
9 Otros gastos direct - Multas 2022-01 4 23 days 01:28:03.363500 17 days 12:25:17.193104877 3 days 15:02:21.947000 10 days 14:01:07.067000 24 days 21:33:49.100000 37 days 09:00:45.396500 38 days 19:42:13.307000
In [926]:
#Top proveedores por delta medio fecha aprobacion y transaccion por mes 
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe().reset_index()
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)

categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]

categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Categoría') + ylab('Tiempo (dias)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[926]:
<ggplot: (139881653212)>
In [927]:
#Contero de apariciones en el top por mes por categoria
categoria_count = stats_top.groupby('Categoría ')['Categoría '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Categoría ':'Meses en top'}).reset_index()
Out[927]:
Categoría Meses en top
0 Peajes (Estructura) 8
1 Mantenim. Calderas y Quemadores 7
2 Viveres (Aarticulos de aseo) 5
3 Gastos de formacion y capacitaciones 4
4 Gastos Medicos 4
5 Materiales seguridad (Lentes, mascara, chaleco... 4
6 Guantes 4
7 Transporte 4
8 Resto de coste indirec-Atencion y Aportes Social 3
9 Mantencion de vehiculos 3
10 Peajes / estacionamiento 3
11 Formaciones y capacitaciones 3
12 Bolsas para limpieza 3
13 Alquiler estacionamiento 3
14 Repuestos equipos y maquinaria 3
15 Servicios Internet 2
16 Seguridad y Salud Ocupacional 2
17 Publicidad (Estructura) 2
18 Suscripciones y correo 2
19 Mantenimient Equipos oficina (TI) 2
20 Viveres (Articulos aseo, y despenza base) 2
21 Aceite Hidraulico 2
22 Gasoleo - Diesel y gasolina 2
23 Energia Electrica 2
24 Herramientas de taller 1
25 En serv. Locomocion publica 1
26 Aceite Motor 1
27 Base operaciones - Arriendo 1
28 Servicios de mensajeria 1
29 COSTO DE INTEGRACION SONATE 1
30 Compra de agua 1
31 Resto de coste indirecto - Multas 1
32 De instalac. Y obras (mant. instalaciones) 1
33 Diesel 1
34 Publicidad 1
35 Abogados 1
36 Gas y otros 1
37 Otros servicios de Asesorias 1
38 Otros mat. de impermeabilizacion 1
39 Materiales seguridad (Mascaras - lentes - guan... 1
40 Materiales de oficina - TI 1
41 Mantenimiento oficinas e instalaciones 1
42 Mantenimiento Vehiculos 1
43 Gasolina 1
44 ADITIVO BLUE MAX 1
In [928]:
#Cateogrias que se repiten en el top mensual segun criterio (como minimo estar en el top tres meses)

#n_month = 2 #Criterio, como minimo estar en el top 2 meses
n_month = 4 #Criterio, como minimo estar en el top 2 meses

#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses

df_top_categorias = df[df['Categoría '].isin(categoria_count[categoria_count >= n_month].index.to_list())]

df_top_categorias['Categoría '] = pd.Categorical(df_top_categorias['Categoría '], categories=categoria_count[categoria_count >= n_month].index.to_list())

ggplot(df_top_categorias, aes(y='delta_Fecha_aprobación_transacción_day_int', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 1, scales='free') + coord_flip()  +  theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 5 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[928]:
<ggplot: (139883091497)>
In [929]:
#Estadisticas de delta fecha aprobacion y transaccion por categoria del top mes a mes 
stats = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
Out[929]:
Categoría Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 Peajes (Estructura) 2022-01 71 35 days 03:24:32.117225352 34 days 06:55:34.085707294 1 days 15:02:04.650000 13 days 15:20:22.148000 26 days 01:05:43.193000 35 days 15:50:08.343500 118 days 03:55:29.213000
1 Peajes (Estructura) 2022-02 79 35 days 01:37:29.030240506 31 days 13:01:11.303943661 1 days 12:06:09.603000 13 days 20:38:08.403500 15 days 20:38:34.030000 60 days 10:25:48.743000 102 days 22:53:38.410000
2 Peajes (Estructura) 2022-03 47 38 days 05:00:27.629978723 21 days 19:53:06.432699643 3 days 14:28:38.920000 27 days 12:57:04.580000 38 days 23:16:31.800000 53 days 10:31:18.838000 92 days 19:14:05.997000
3 Peajes (Estructura) 2022-04 82 30 days 22:25:04.305024390 18 days 06:22:31.090776525 2 days 17:03:04.200000 17 days 17:52:22.337000 31 days 09:19:46.612000 41 days 22:11:49.782500 103 days 17:52:57.763000
4 Peajes (Estructura) 2022-05 97 36 days 14:45:22.203649484 24 days 20:34:49.770261797 3 days 15:33:14.493000 14 days 23:37:35.143000 39 days 03:20:38.250000 47 days 20:40:40.010000 111 days 13:43:07.960000
In [930]:
#plot Mediana del delta de aprobacion y transaccion por mes por categoria del top mes a mes
stats = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 1 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[930]:
<ggplot: (139883947063)>
In [903]:
#Mediana del delta de fecha aprobacion y transaccion por categoria y por volumen de registros del top mes a mes
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 1 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[903]:
<ggplot: (139740093879)>
In [905]:
#plot volumen de registros por mes por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[905]:
<ggplot: (139752888158)>
In [908]:
#plot volumen de registros por mes y mediana del delta de aprobacion y transaccion por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[908]:
<ggplot: (139881046434)>
In [909]:
#boxplot monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 5 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[909]:
<ggplot: (139880572691)>
In [931]:
#Violin monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 5 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[931]:
<ggplot: (139883736917)>
In [933]:
#Mediana vs promedio por Categoría
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Categoría ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Categoría '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Categoría ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 2 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[933]:
<ggplot: (139893425873)>

Tiempo por Proveedor

In [934]:
#Estadisticas delta fecha aprobacion y transaccion por proveedor (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
Out[934]:
Proveedor Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 "1 JUZGADO POLICIA LOCAL MAIPU " 2022-10 2 2 days 14:20:40.748000 0 days 00:00:00.176776695 2 days 14:20:40.623000 2 days 14:20:40.685500 2 days 14:20:40.748000 2 days 14:20:40.810500 2 days 14:20:40.873000
1 '-- 2022-07 1 1 days 17:22:57.327000 NaT 1 days 17:22:57.327000 1 days 17:22:57.327000 1 days 17:22:57.327000 1 days 17:22:57.327000 1 days 17:22:57.327000
2 '-- 2022-08 2 6 days 05:50:51.938500 6 days 07:54:06.061709578 1 days 18:26:13.380000 4 days 00:08:32.659250 6 days 05:50:51.938500 8 days 11:33:11.217750 10 days 17:15:30.497000
3 '--- 2022-07 1 27 days 17:11:22.077000 NaT 27 days 17:11:22.077000 27 days 17:11:22.077000 27 days 17:11:22.077000 27 days 17:11:22.077000 27 days 17:11:22.077000
4 '--- 2022-08 1 1 days 15:38:36.550000 NaT 1 days 15:38:36.550000 1 days 15:38:36.550000 1 days 15:38:36.550000 1 days 15:38:36.550000 1 days 15:38:36.550000
In [940]:
#top proveedores con mayor delta medio de fecha aprobacion y transaccion por mes (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe().reset_index()
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(5)
Out[940]:
Proveedor Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
Index
0 Ruta La Araucanía S.A. 2022-01 1.000000000 208.000000000 NaN 208.000000000 208.000000000 208.000000000 208.000000000 208.000000000
1 China King 2022-01 1.000000000 207.000000000 NaN 207.000000000 207.000000000 207.000000000 207.000000000 207.000000000
2 Claudio DOugnac 2022-01 1.000000000 184.000000000 NaN 184.000000000 184.000000000 184.000000000 184.000000000 184.000000000
3 CENCOSUD EASY 2022-01 1.000000000 140.000000000 NaN 140.000000000 140.000000000 140.000000000 140.000000000 140.000000000
4 Eco kleen car wash 2022-01 1.000000000 117.000000000 NaN 117.000000000 117.000000000 117.000000000 117.000000000 117.000000000
In [947]:
#Top Proveedores por monto total por mes 
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]

categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)

ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Proveedor') + ylab('Tiempo (dias)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[947]:
<ggplot: (139892467974)>
In [948]:
#Contero de apariciones en el top por mes por proveedor
categoria_count = stats_top.groupby('Proveedor ')['Proveedor '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Proveedor ':'Meses en top'}).reset_index()
Out[948]:
Proveedor Meses en top
0 soc. c. de los lagos 2
1 ESMAX RED LIMITADA 2
2 Rodrigo Naranjo Alborta E.i.r.l 1
3 Parking time 1
4 Panda Oriental Ltda 1
... ... ...
93 Eco Kleen Car Wash Spa 1
94 Eco Kleen Car Wash 1
95 Eco Kleen Car Hash Spa 1
96 ESTACIONAMIENTOS JAVIER NOVOA ORTEGA EIRL 1
97 La Junta Ltda. 1

98 rows × 2 columns

In [946]:
#Proveedores que se repiten en el top mensual segun criterio (como minimo estar en el top 2 meses)

n_month = 2 #Criterio, como minimo estar en el top 2 meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses

df_top_categorias = df[df['Proveedor '].isin(categoria_count[categoria_count >= n_month].index.to_list())]

df_top_categorias['Proveedor '] = pd.Categorical(df_top_categorias['Proveedor '], categories=categoria_count[categoria_count >= n_month].index.to_list())

ggplot(df_top_categorias, aes(y='delta_Fecha_aprobación_transacción_day_int', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Proveedor ', ncol = 1, scales='free') + coord_flip()  +  theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[946]:
<ggplot: (139893405213)>
In [949]:
#Estadisticas del delta de fecha aprobacion y transaccion por Proveedor del top mes a mes
stats = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats.head(5)
Out[949]:
Proveedor Fecha_de_la_transacción_year_month count mean std min 25% 50% 75% max
0 soc. c. de los lagos 2022-02 4.000000000 195.750000000 4.991659711 192.000000000 192.750000000 194.000000000 197.000000000 203.000000000
1 soc. c. de los lagos 2022-03 1.000000000 148.000000000 NaN 148.000000000 148.000000000 148.000000000 148.000000000 148.000000000
2 soc. c. de los lagos 2022-05 1.000000000 5.000000000 NaN 5.000000000 5.000000000 5.000000000 5.000000000 5.000000000
3 soc. c. de los lagos 2022-06 1.000000000 7.000000000 NaN 7.000000000 7.000000000 7.000000000 7.000000000 7.000000000
4 soc. c. de los lagos 2022-07 5.000000000 3.600000000 1.516575089 2.000000000 2.000000000 4.000000000 5.000000000 5.000000000
In [951]:
#plot Monto total por mes por proveedor del top mes a mes
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[951]:
<ggplot: (139883934633)>
In [953]:
#Monto total por proveedor y por volumen de registros del top mes a mes
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[953]:
<ggplot: (139883292495)>
In [956]:
#Monto total por proveedor y por volumen de registros del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° registros') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[956]:
<ggplot: (139882795774)>
In [957]:
#boxplot monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[957]:
<ggplot: (139882808339)>
In [958]:
#Violin monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[958]:
<ggplot: (139877787132)>
In [959]:
#Mediana vs promedio por proveedor
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Proveedor ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Proveedor '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free')  +  theme_bw() + guides(size = False) +  theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[959]:
<ggplot: (139878135352)>

Tiempo en grupo monto sobre / bajo 60.000 CLP por tipo de documento

Tiempo en grupo monto sobre / bajo 60.000 CLP por Fondo fijo / Anticipo / Reembolso

Tiempo en grupo monto sobre / bajo 60.000 CLP por Sucursal

Analisis en conjunto de registros con monto atipico / no atipico

In [918]:
#Agregar etiqueta de monto outlier

outliers_threshold = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].mean().to_frame() + 3*df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].agg('std').to_frame()
outliers_threshold.columns = ['outliers_threshold']
outliers_threshold = outliers_threshold.reset_index()

df = pd.merge(df, outliers_threshold,  how='left', on=['Fecha_de_la_transacción_year_month'])
df['outliers_threshold']

df['outlier'] = df['Cantidad '] > df['outliers_threshold']

df['outlier_label'] = 'no outlier'

df['outlier_label'][df['outlier']] = 'outlier'
C:\Users\mwils\AppData\Local\Temp\ipykernel_26872\1116461469.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Monto

Monto en grupo monto atipico / no atipico

In [919]:
#Estadisitcas monto outlier y no outlier
stats = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats
Out[919]:
Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max kurt sum
0 2022-1 no outlier 1536.000000000 26471.211601562 43069.606540885 0.000000000 4705.000000000 13300.000000000 30404.250000000 405590.000000000 24.122147455 40659781.020000003
1 2022-1 outlier 20.000000000 916522.825000000 534145.878713052 430000.000000000 526700.000000000 674475.000000000 1122565.500000000 2176000.000000000 0.098491503 18330456.500000000
2 2022-2 no outlier 1444.000000000 26134.978552632 39306.826593109 0.000000000 5000.000000000 13200.000000000 30445.000000000 300000.000000000 18.732447239 37738909.030000001
3 2022-2 outlier 12.000000000 807491.500000000 607607.910171668 338300.000000000 397012.500000000 634504.000000000 955421.250000000 2490722.000000000 5.483014428 9689898.000000000
In [920]:
#Plot monto total grupos de monto outlier y no outlier
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap('outlier_label', ncol = 2)  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[920]:
<ggplot: (188940869065)>
In [921]:
#Volumen de registros y Monto por mes en grupos de monto outlier y no outlier
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', color='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('outlier_label', ncol = 2) + theme_bw() +  coord_flip() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[921]:
<ggplot: (188938815528)>
In [922]:
#boxplot monto por mes en grupos de monto outlier y no outlier
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
Out[922]:
<ggplot: (188935603285)>
In [923]:
#Zoom a grupo de monto no outlier
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
Out[923]:
<ggplot: (188938429437)>

Monto en grupo monto atipico / no atipico por Tipo de documento

In [924]:
#Estadisticas en grupos de monto oulier y no oulier por tipo de documento
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats
Out[924]:
Fecha_de_la_transacción_year_month outlier_label Tipo Documento count mean std min 25% 50% 75% max kurt sum
0 2022-1 no outlier CHL - Documento Exento 1361.000000000 24820.552549596 39878.100392706 0.000000000 4950.000000000 12950.000000000 30000.000000000 405590.000000000 29.157130110 33780772.020000003
1 2022-1 no outlier CHL - Factura Afecta 175.000000000 39308.622857143 61218.730443279 700.000000000 2020.000000000 20000.000000000 45568.000000000 357814.000000000 9.123795484 6879009.000000000
2 2022-1 outlier CHL - Documento Exento 10.000000000 936689.150000000 613282.278861865 430000.000000000 492943.000000000 600950.000000000 1373804.500000000 2176000.000000000 0.087396082 9366891.500000000
3 2022-1 outlier CHL - Factura Afecta 10.000000000 896356.500000000 474664.159614458 500000.000000000 602878.750000000 740725.000000000 932662.500000000 1766400.000000000 0.611803850 8963565.000000000
4 2022-2 no outlier CHL - Documento Exento 1332.000000000 23343.070255255 33300.596306346 0.000000000 4900.000000000 12000.000000000 29975.000000000 261863.000000000 19.136446232 31092969.579999998
5 2022-2 no outlier CHL - Factura Afecta 112.000000000 59338.745089286 74714.257844758 43.450000000 17404.250000000 30649.500000000 61427.250000000 300000.000000000 4.014234385 6645939.450000000
6 2022-2 outlier CHL - Documento Exento 7.000000000 785286.142857143 776095.398478054 338300.000000000 363511.500000000 403350.000000000 768804.000000000 2490722.000000000 5.642201215 5497003.000000000
7 2022-2 outlier CHL - Factura Afecta 5.000000000 838579.000000000 331222.351231012 452200.000000000 539000.000000000 904995.000000000 1106700.000000000 1190000.000000000 -2.726125246 4192895.000000000
In [925]:
#Plot de monto por mes en grupos de monto oulier y no oulier por Tipo de documento 
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('monto') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[925]:
<ggplot: (188936331532)>
In [926]:
#Volumen de registros y Monto por mes en grupos de monto oulier y no oulier por tipo de documento
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', color='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_y=0.5, panel_spacing_x=0.5)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[926]:
<ggplot: (188948857145)>
In [927]:
#boxplot monto por mes en grupos de monto oulier y no oulier por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Tipo Documento', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[927]:
<ggplot: (188944217113)>
In [928]:
#Zoom a grupo de monto no outlier por tipo de documento
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'Tipo Documento')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
Out[928]:
<ggplot: (188943828473)>

Monto en grupo monto atipico / no atipico por Fondo fijo / Anticipo / Reembolso

In [929]:
#Estadisticas de monto en grupos de monto oulier y no oulier por Fondo fijo
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()

stats
Out[929]:
Fecha_de_la_transacción_year_month outlier_label Fondo fijo count mean std min 25% 50% 75% max kurt sum
0 2022-1 no outlier ANTICIPO (VxR) 40.000000000 75367.100000000 76952.080627575 936.000000000 17025.000000000 48500.000000000 94855.500000000 309600.000000000 2.098477259 3014684.000000000
1 2022-1 no outlier No aplica 10.000000000 51582.500000000 101945.839303960 500.000000000 4600.000000000 8285.000000000 28878.750000000 326800.000000000 7.337277214 515825.000000000
2 2022-1 no outlier REEMBOLSO 401.000000000 22754.711122195 37136.256018548 500.000000000 4270.000000000 10835.000000000 25990.000000000 326652.000000000 28.414926755 9124639.160000000
3 2022-1 no outlier Si 1085.000000000 25810.721529954 41460.207927473 0.000000000 4760.000000000 14000.000000000 30417.000000000 405590.000000000 27.872578219 28004632.859999999
4 2022-1 outlier ANTICIPO (VxR) 17.000000000 922296.705882353 538442.379216635 430000.000000000 535600.000000000 731850.000000000 996946.000000000 2176000.000000000 0.416853361 15679044.000000000
5 2022-1 outlier REEMBOLSO 2.000000000 1024148.750000000 812553.726377601 449586.500000000 736867.625000000 1024148.750000000 1311429.875000000 1598711.000000000 NaN 2048297.500000000
6 2022-1 outlier Si 1.000000000 603115.000000000 NaN 603115.000000000 603115.000000000 603115.000000000 603115.000000000 603115.000000000 NaN 603115.000000000
7 2022-2 no outlier ANTICIPO (VxR) 42.000000000 62816.238095238 84525.341923637 1100.000000000 10000.000000000 19781.500000000 92695.000000000 300000.000000000 1.073895928 2638282.000000000
8 2022-2 no outlier No aplica 9.000000000 54984.222222222 79673.236840827 4900.000000000 17489.000000000 30000.000000000 49582.000000000 261863.000000000 7.702282351 494858.000000000
9 2022-2 no outlier REEMBOLSO 382.000000000 20517.504136126 31502.584787731 0.000000000 3607.500000000 9195.000000000 24630.000000000 298737.000000000 28.765970413 7837686.580000000
10 2022-2 no outlier Si 1011.000000000 26476.837240356 37647.149728328 43.450000000 5200.000000000 14400.000000000 31900.000000000 300000.000000000 19.315008368 26768082.449999999
11 2022-2 outlier ANTICIPO (VxR) 7.000000000 818643.285714286 273495.373322667 452200.000000000 634504.000000000 807600.000000000 1005847.500000000 1190000.000000000 -1.215672903 5730503.000000000
12 2022-2 outlier REEMBOLSO 2.000000000 1419872.500000000 1514409.886160448 349023.000000000 884447.750000000 1419872.500000000 1955297.250000000 2490722.000000000 NaN 2839745.000000000
13 2022-2 outlier Si 3.000000000 373216.666666667 32787.739070167 338300.000000000 358150.000000000 378000.000000000 390675.000000000 403350.000000000 NaN 1119650.000000000
In [930]:
#Plot de monto por mes en grupos de monto oulier y no oulier por Fondo fijo
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('monto') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.2, aspect_ratio = 0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[930]:
<ggplot: (188944331239)>
In [931]:
#Volumen de registros y Monto por mes en grupos de monto oulier y no oulier por Fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', color='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Mes') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2) + theme_bw() +  coord_flip() + theme(panel_spacing_y=0.4, panel_spacing_x=0.5, aspect_ratio = 0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[931]:
<ggplot: (188944158693)>
In [932]:
#boxplot monto por mes en grupos de monto oulier y no oulier por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Fondo fijo', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[932]:
<ggplot: (188940772145)>
In [933]:
#Zoom a grupo de monto no outlier por fondo fijo
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
Out[933]:
<ggplot: (188938132201)>
In [934]:
#Zoom a grupo de monto outlier por fondo fijo
ggplot(df[df['outlier'] == True], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
Out[934]:
<ggplot: (188944977374)>

Monto en grupo monto atipico / no atipico por Sucursal

In [935]:
#Estadisticas de monto en grupos de monto oulier y no oulier por Sucursal (head 5)

stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()

stats.head(5)
Out[935]:
Fecha_de_la_transacción_year_month outlier_label Sucursal count mean std min 25% 50% 75% max kurt sum
0 2022-1 no outlier GESTION INTEGRAL DE RESIDUOS SPA 29.000000000 14757.379310345 16716.822441169 730.000000000 2200.000000000 7000.000000000 20990.000000000 59080.000000000 0.568508935 427964.000000000
1 2022-1 no outlier PROCESOS SANTARIOS SPA 284.000000000 35686.430633803 58982.947434996 700.000000000 5995.000000000 14400.000000000 40040.000000000 405590.000000000 15.287269935 10134946.300000001
2 2022-1 no outlier VEOLIA ENERGIA CHILE 132.000000000 11218.703484848 14702.830954940 11.550000000 1997.500000000 4500.000000000 15948.500000000 75000.000000000 3.944176011 1480868.860000000
3 2022-1 no outlier VEOLIA HOLDING CHILE S.A. 91.000000000 32357.152307692 59543.712096510 500.000000000 2200.000000000 10580.000000000 35503.580000000 326418.000000000 10.240855982 2944500.860000000
4 2022-1 no outlier VEOLIA RESIDUOS CHILE S.A. 16.000000000 39382.875000000 26461.022257590 7000.000000000 20448.750000000 35858.000000000 49437.750000000 83311.000000000 -0.814407178 630126.000000000
In [936]:
#Plot de monto por mes en grupo monto no outlier por Sucursal

ggplot(stats[stats['outlier_label'] == 'no outlier'], aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[936]:
<ggplot: (188944968539)>
In [937]:
#Plot de monto por mes en grupo de monto oulier por Sucursal

ggplot(stats[stats['outlier_label'] != 'no outlier'], aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[937]:
<ggplot: (188933667387)>
In [938]:
#Volumen de registros y Monto por mes en grupos de monto oulier y no oulier por Sucursal

ggplot(stats, aes(x='count ',y='Sucursal ', color='sum')) + geom_point(aes(size='count'))  + xlab('N° registros') + ylab('Sucursal') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], ncol = 2) + theme_bw() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[938]:
<ggplot: (188928731359)>
In [939]:
#boxplot monto por mes en grupos de monto oulier y no oulier por Sucursal

ggplot(df, aes(x='Sucursal ',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + facet_wrap('Fecha_de_la_transacción_year_month ', ncol = 2) + coord_flip() + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y=.5, aspect_ratio=2, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[939]:
<ggplot: (188948789882)>
In [940]:
#boxplot monto por mes en grupos de monto oulier y no oulier por Sucursal (igual que arriba, pero rota color = fecha, wrap = outlier )
ggplot(df, aes(x='Sucursal ',y='Cantidad ', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + facet_wrap('outlier_label', ncol = 2, scales = 'free') + coord_flip() + theme_bw() + theme(panel_spacing_x=3, panel_spacing_y=.5, aspect_ratio=4, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[940]:
<ggplot: (188942673659)>
In [941]:
#Zoom a grupo de monto no outlier por sucursal
ggplot(df[df['outlier'] == False], aes(x='Sucursal ',y='Cantidad ', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
Out[941]:
<ggplot: (188942691626)>
In [942]:
#Zoom a grupo de monto outlier por sucursal
ggplot(df[df['outlier'] == True], aes(x='Sucursal ',y='Cantidad ', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
Out[942]:
<ggplot: (188933660594)>

Monto en grupo monto atipico / no atipico por Categoria

In [943]:
#Estadisticas por categoria sobre grupos de monto oulier y no oulier (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[943]:
Categoría Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max kurt sum
0 Aceite Hidraulico 2022-1 no outlier 4.000000000 32492.500000000 13071.726167573 13000.000000000 30992.500000000 38490.000000000 39990.000000000 39990.000000000 3.750498016 129970.000000000
1 Aceite Hidraulico 2022-2 no outlier 4.000000000 40337.500000000 59449.378115615 5400.000000000 6585.000000000 13490.000000000 47242.500000000 128970.000000000 3.742463372 161350.000000000
2 Aceite Motor 2022-1 no outlier 8.000000000 12375.000000000 7293.588182193 1500.000000000 10875.000000000 13000.000000000 14000.000000000 26000.000000000 1.437663240 99000.000000000
3 Aceite Motor 2022-2 no outlier 3.000000000 21233.333333333 12568.346483660 13000.000000000 14000.000000000 15000.000000000 25350.000000000 35700.000000000 NaN 63700.000000000
4 Alojamientos 2022-1 no outlier 3.000000000 94333.333333333 51549.329125929 39000.000000000 71000.000000000 103000.000000000 122000.000000000 141000.000000000 NaN 283000.000000000
In [944]:
#top categorias con mayor monto por mes en grupos de monto oulier y no oulier (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
Out[944]:
Categoría Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max kurt sum
Index
0 Manutencion - Colacion 2022-1 no outlier 260.000000000 26383.076038462 51245.734643259 886.600000000 5075.000000000 10545.000000000 20462.250000000 405590.000000000 21.749169761 6859599.770000000
1 Peajes 2022-1 no outlier 197.000000000 26694.974619289 40039.965832933 200.000000000 1600.000000000 14220.000000000 38600.000000000 286330.000000000 19.876583769 5258910.000000000
2 Peajes 2022-1 outlier 4.000000000 1235600.000000000 865701.164759911 500000.000000000 500000.000000000 1133200.000000000 1868800.000000000 2176000.000000000 -4.912002825 4942400.000000000
3 Impto veh. - P.circulacion - Rev. Tecnica - SOAP 2022-1 no outlier 82.000000000 46858.512195122 68753.314855258 936.000000000 11451.250000000 18445.000000000 40219.250000000 326800.000000000 6.272290556 3842398.000000000
4 Pasajes Aereos y terrestres al int (en Chile) 2022-1 outlier 3.000000000 1230553.500000000 687183.973245702 449586.500000000 974505.250000000 1499424.000000000 1621037.000000000 1742650.000000000 NaN 3691660.500000000
In [945]:
#Top categorias por monto total por mes en grupos de monto oulier y no oulier
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)


ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[945]:
<ggplot: (188940947787)>
In [946]:
#calculo de top categorias que se repiten mes a mes por grupo de monto oulier y no oulier
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())

categoria_count = stats_top.groupby(['Categoría ', 'outlier_label'])['Categoría '].count().sort_values(ascending=False)
categoria_count = stats_top.groupby(['Categoría ', 'outlier_label'])['Categoría '].count().sort_values(ascending=False)

categoria_count = categoria_count[categoria_count == n_month].index.to_list()

df_aux = df 
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Categoría '].to_list(), df_aux['outlier_label'].to_list()))

df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]

#df_top_categorias
In [947]:
#top categorias que se repiten mes a mes del grupo de monto no oulier

ggplot(df_top_categorias[df_top_categorias['outlier_label'] == 'no outlier'], aes(y='Cantidad ', x = 'Categoría ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip()  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[947]:
<ggplot: (188942673713)>
In [948]:
#top categorias que se repiten mes a mes del grupo de monto outlier

ggplot(df_top_categorias[df_top_categorias['outlier_label'] != 'no outlier'], aes(y='Cantidad ', x = 'Categoría ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip()  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[948]:
<ggplot: (188980256513)>

Monto en grupo monto atipico / no atipico por Proveedor

In [949]:
#Estadisticas por proveedores sobre grupos de monto oulier y no oulier (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
Out[949]:
Proveedor Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max kurt sum
0 0 2022-1 no outlier 4.000000000 19945.000000000 15184.458062989 0.000000000 13350.000000000 22015.000000000 28610.000000000 35750.000000000 0.434133850 79780.000000000
1 12636468-7 2022-2 no outlier 1.000000000 9000.000000000 NaN 9000.000000000 9000.000000000 9000.000000000 9000.000000000 9000.000000000 NaN 9000.000000000
2 14324088-6 2022-2 no outlier 1.000000000 13200.000000000 NaN 13200.000000000 13200.000000000 13200.000000000 13200.000000000 13200.000000000 NaN 13200.000000000
3 24100 2022-1 no outlier 1.000000000 24100.000000000 NaN 24100.000000000 24100.000000000 24100.000000000 24100.000000000 24100.000000000 NaN 24100.000000000
4 2DO JUZGADO POLICIA LOCAL ( ABONAR A CLAUDIO O... 2022-1 no outlier 1.000000000 163326.000000000 NaN 163326.000000000 163326.000000000 163326.000000000 163326.000000000 163326.000000000 NaN 163326.000000000
In [950]:
#top proveedores con mayor monto por mes en grupos de monto oulier y no oulier (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
Out[950]:
Proveedor Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max kurt sum
Index
0 RUTA DE LA ARAUCANIA SOCIEDAD CONCESIONARIA S.A 2022-1 outlier 4.000000000 842300.000000000 617969.697207449 500000.000000000 500000.000000000 551400.000000000 893700.000000000 1766400.000000000 3.870873076 3369200.000000000
1 Ruta de la Araucanía 2022-1 outlier 2.000000000 1355800.000000000 1159937.963858413 535600.000000000 945700.000000000 1355800.000000000 1765900.000000000 2176000.000000000 NaN 2711600.000000000
2 BYF CORREDORES DE SEGUROS LTDA 2022-1 no outlier 20.000000000 111590.000000000 92384.772611758 16500.000000000 42650.000000000 86000.000000000 144300.000000000 326800.000000000 0.703073558 2231800.000000000
3 PEAJES 2022-1 no outlier 14.000000000 127977.142857143 97321.271643386 8500.000000000 33175.000000000 131870.000000000 191932.500000000 286330.000000000 -1.156886909 1791680.000000000
4 soc. y turismo cascada de las Animas 2022-1 outlier 1.000000000 1742650.000000000 NaN 1742650.000000000 1742650.000000000 1742650.000000000 1742650.000000000 1742650.000000000 NaN 1742650.000000000
In [951]:
#Top proveedores por monto total por mes en grupos de monto oulier y no oulier
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)


ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[951]:
<ggplot: (188944323104)>
In [952]:
#calculo de top proveedores que se repiten mes a mes por grupo de monto oulier y no oulier
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())

categoria_count = stats_top.groupby(['Proveedor ', 'outlier_label'])['Proveedor '].count().sort_values(ascending=False)
categoria_count = stats_top.groupby(['Proveedor ', 'outlier_label'])['Proveedor '].count().sort_values(ascending=False)

categoria_count = categoria_count[categoria_count == n_month].index.to_list()

df_aux = df 
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Proveedor '].to_list(), df_aux['outlier_label'].to_list()))

df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
In [953]:
#top proveedores que se repiten mes a mes del grupo de monto no outlier

ggplot(df_top_categorias[df_top_categorias['outlier_label'] == 'no outlier'], aes(y='Cantidad ', x = 'Proveedor ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip()  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[953]:
<ggplot: (188943625862)>
In [954]:
#top proveedores que se repiten mes a mes del grupo de monto oulier

ggplot(df_top_categorias[df_top_categorias['outlier_label'] != 'no outlier'], aes(y='Cantidad ', x = 'Proveedor ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip()  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[954]:
<ggplot: (188940948123)>

Tiempo

Tiempo total en grupo monto atipico / no atipico

In [955]:
#Estadisitcas delta fecha aprobacion y transaccion sobre grupos de monto outier y no outlier
stats = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
Out[955]:
Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max
0 2022-1 no outlier 1535 18 days 14:45:18.600895113 24 days 06:07:30.089167873 0 days 12:50:17.203000 3 days 22:16:23.328500 9 days 14:26:55.083000 22 days 13:58:07.938500 228 days 21:13:48.960000
1 2022-1 outlier 20 39 days 10:29:04.452300 49 days 04:05:52.966655539 0 days 19:58:53.307000 4 days 12:57:21.412750 21 days 16:14:05.965000 49 days 21:22:57.055000 184 days 02:40:04.157000
2 2022-2 no outlier 1443 19 days 07:33:21.484621621 28 days 08:36:53.664947149 0 days 14:02:14.193000 3 days 14:56:39.558500 7 days 16:11:19.140000 21 days 13:23:24.600000 203 days 15:40:56.447000
3 2022-2 outlier 12 17 days 10:26:07.342083333 29 days 21:01:01.697772761 0 days 21:29:30.367000 1 days 09:45:56.209500 2 days 13:43:37.801500 15 days 17:23:47.647750 83 days 17:37:37.997000
In [956]:
#Plot mediana delta fecha aprobacion y transaccion en grupos de monto outier y no outlier
stats = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})

ggplot(stats, aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2)  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[956]:
<ggplot: (188933928702)>
In [957]:
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier
ggplot(stats, aes(x='N° Registros',y='Fecha_de_la_transacción_year_month', color='Dias')) + geom_point(aes(size='N° Registros'))  + xlab('N° registros') + ylab('Mes') + facet_wrap('outlier_label', ncol = 2) + theme_bw() +  coord_flip() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[957]:
<ggplot: (188934253088)>
In [958]:
#boxplot delta fecha aprobacion y transaccion por mes en grupos de monto outier y no outlier
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo (dias)') + coord_flip() + theme_bw()
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
Out[958]:
<ggplot: (188922080833)>

Tiempo en grupo grupo monto atipico / no atipico por tipo de documento

In [959]:
#Estadisticas de delta fecha aprobacion y transaccion por mes en grupos de monto outier y no outlier por tipo de documento
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
Out[959]:
Fecha_de_la_transacción_year_month outlier_label Tipo Documento count mean std min 25% 50% 75% max
0 2022-1 no outlier CHL - Documento Exento 1360 18 days 21:12:07.643848529 24 days 03:30:24.238639703 0 days 12:50:17.203000 3 days 21:38:23.359250 9 days 15:20:22.593000 22 days 20:58:25.916750 208 days 15:40:57.493000
1 2022-1 no outlier CHL - Factura Afecta 175 16 days 12:39:11.181371428 25 days 01:26:01.807138667 0 days 14:57:56.833000 4 days 13:19:33.862000 6 days 22:03:23.927000 18 days 05:42:46.096500 228 days 21:13:48.960000
2 2022-1 outlier CHL - Documento Exento 10 39 days 08:50:46.774700 62 days 00:33:02.268031093 0 days 19:58:53.307000 3 days 16:37:49.032250 4 days 07:46:57.368500 68 days 07:18:13.114250 184 days 02:40:04.157000
3 2022-1 outlier CHL - Factura Afecta 10 39 days 12:07:22.129900 35 days 11:00:20.469659058 5 days 12:34:56.157000 21 days 15:49:31.206000 30 days 03:38:58.106500 36 days 15:00:08.862500 103 days 17:37:37.903000
4 2022-2 no outlier CHL - Documento Exento 1331 19 days 18:59:05.559374154 29 days 02:31:48.883249929 0 days 14:02:14.193000 3 days 15:19:23.866500 7 days 18:13:58.177000 21 days 14:38:35.122000 203 days 15:40:56.447000
5 2022-2 no outlier CHL - Factura Afecta 112 13 days 15:44:08.239125 16 days 04:49:13.403850101 0 days 16:47:08.880000 3 days 14:19:27.212250 7 days 11:56:44.890000 17 days 21:29:53.300500 85 days 15:54:25.647000
6 2022-2 outlier CHL - Documento Exento 7 6 days 08:06:46.442571428 6 days 18:59:20.584001436 0 days 21:29:30.367000 1 days 14:22:32.282000 2 days 13:43:53.323000 10 days 13:34:54.697000 16 days 13:39:07.450000
7 2022-2 outlier CHL - Factura Afecta 5 32 days 23:17:12.601400 43 days 05:20:13.146994361 0 days 21:40:40.883000 0 days 21:46:43.857000 2 days 13:43:22.280000 76 days 17:37:37.990000 83 days 17:37:37.997000
In [960]:
#Plot de delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Tipo de documento 
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})

ggplot(stats, aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[960]:
<ggplot: (188934253094)>
In [ ]:
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por tipo de documento
ggplot(stats, aes(x='N° Registros',y='Fecha_de_la_transacción_year_month', color='Dias')) + geom_point(aes(size='N° Registros'))  + xlab('N° registros') + ylab('Mes') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_y=0.5, panel_spacing_x=0.5)
In [961]:
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Tipo Documento', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
Out[961]:
<ggplot: (188938977944)>

Tiempo en grupo monto atipico / no atipico por Fondo fijo / Anticipo / Reembolso

In [962]:
#Estadisticas de delta de fecha de aprobacion y transaccion en grupos de monto outier y no outlier por Fondo fijo
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()

stats
Out[962]:
Fecha_de_la_transacción_year_month outlier_label Fondo fijo count mean std min 25% 50% 75% max
0 2022-1 no outlier ANTICIPO (VxR) 40 47 days 22:36:30.679100 44 days 07:42:45.186159767 2 days 21:28:29.130000 3 days 21:26:10.033500 39 days 17:53:19.668500 86 days 18:30:56.782500 112 days 18:31:03.363000
1 2022-1 no outlier No aplica 10 15 days 03:04:33.145400 10 days 02:33:36.277393134 3 days 21:26:09.297000 8 days 20:56:05.725250 14 days 02:56:05.743500 17 days 20:38:16.848000 39 days 14:14:10.410000
2 2022-1 no outlier REEMBOLSO 400 18 days 03:38:43.134352499 23 days 20:34:21.485607245 0 days 13:56:17.123000 4 days 21:52:34.728250 10 days 13:36:29.502000 21 days 14:58:23.653000 228 days 21:13:48.960000
3 2022-1 no outlier Si 1085 17 days 17:40:25.290336405 22 days 18:39:45.369740785 0 days 12:50:17.203000 3 days 17:48:35.467000 9 days 01:49:21.473000 22 days 14:07:33.910000 208 days 15:40:57.493000
4 2022-1 outlier ANTICIPO (VxR) 17 29 days 17:43:10.038411764 35 days 00:01:26.696718079 0 days 19:58:53.307000 3 days 21:26:09.280000 21 days 15:24:56.447000 33 days 16:59:30.680000 103 days 17:37:37.903000
5 2022-1 outlier REEMBOLSO 2 138 days 01:05:04.735000 65 days 03:31:50.582185716 91 days 23:30:05.313000 115 days 00:17:35.024000 138 days 01:05:04.735000 161 days 01:52:34.446000 184 days 02:40:04.157000
6 2022-1 outlier Si 1 7 days 02:17:28.923000 NaT 7 days 02:17:28.923000 7 days 02:17:28.923000 7 days 02:17:28.923000 7 days 02:17:28.923000 7 days 02:17:28.923000
7 2022-2 no outlier ANTICIPO (VxR) 41 49 days 00:58:11.847780488 28 days 12:20:11.070784460 0 days 16:21:41.257000 10 days 23:42:58.323000 61 days 18:31:03.363000 68 days 19:59:26.777000 78 days 18:30:56.837000
8 2022-2 no outlier No aplica 9 16 days 12:50:18.923666666 18 days 13:09:45.994352181 1 days 02:22:16.203000 3 days 13:47:43.693000 6 days 02:22:16.177000 28 days 17:07:06.090000 49 days 14:39:48.870000
9 2022-2 no outlier REEMBOLSO 382 20 days 13:56:54.576479057 27 days 15:55:26.582841934 0 days 14:02:14.193000 4 days 14:23:59.762000 9 days 20:08:38.930000 21 days 13:47:43.579750 161 days 22:22:53.340000
10 2022-2 no outlier Si 1011 17 days 15:44:04.350170128 28 days 00:33:42.677869793 0 days 14:22:45.003000 3 days 14:19:27.180000 6 days 21:11:25.850000 18 days 15:26:15.522000 203 days 15:40:56.447000
11 2022-2 outlier ANTICIPO (VxR) 7 24 days 17:31:34.953857142 38 days 00:05:34.076866379 0 days 21:40:40.883000 1 days 17:45:03.068500 2 days 13:43:53.323000 41 days 05:04:23.168500 83 days 17:37:37.997000
12 2022-2 outlier REEMBOLSO 2 1 days 14:22:32.282000 0 days 00:52:08.176760358 1 days 13:45:40.327000 1 days 14:04:06.304500 1 days 14:22:32.282000 1 days 14:40:58.259500 1 days 14:59:24.237000
13 2022-2 outlier Si 3 10 days 23:15:46.288000 8 days 17:48:53.652270656 0 days 21:29:30.367000 8 days 04:04:05.707000 15 days 10:38:41.047000 16 days 00:08:54.248500 16 days 13:39:07.450000
In [963]:
#Plot de mediana de delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Fondo fijo
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})

ggplot(stats, aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.2, aspect_ratio = 0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[963]:
<ggplot: (188939122704)>
In [964]:
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Fondo fijo

ggplot(stats, aes(x='N° Registros',y='Fecha_de_la_transacción_year_month', color='Dias')) + geom_point(aes(size='N° Registros'))  + xlab('N° registros') + ylab('Mes') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2, scales='free') + theme_bw() +  coord_flip() + theme(panel_spacing_y=0.4, panel_spacing_x=0.5, aspect_ratio = 0.4) + theme(panel_spacing_y=0.5, panel_spacing_x=0.5)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[964]:
<ggplot: (188925262284)>
In [965]:
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo(dias)') + facet_wrap('Fondo fijo', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
Out[965]:
<ggplot: (188943491181)>
In [966]:
#Zoom a grupo de monto no outlier por Fondo fijo
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo (dias)') + coord_flip() + theme_bw()
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
Out[966]:
<ggplot: (188941331472)>
In [967]:
#Zoom a grupo de monto outlier por Fondo fijo
ggplot(df[df['outlier'] == True], aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo (dias)') + coord_flip() + theme_bw()
Out[967]:
<ggplot: (188946369726)>

Tiempo en grupo monto atipico / no atipico por Sucursal

In [968]:
#Estadisticas del delta de fecha de aprobacion y transaccion en grupos de monto outier y no outlier por Sucursal (head 5)

stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()

stats.head(5)
Out[968]:
Fecha_de_la_transacción_year_month outlier_label Sucursal count mean std min 25% 50% 75% max
0 2022-1 no outlier GESTION INTEGRAL DE RESIDUOS SPA 28 57 days 23:38:26.290428571 13 days 17:45:14.510641479 43 days 12:46:05.287000 50 days 18:54:36.242500 57 days 12:46:03.315000 59 days 19:45:46.077000 117 days 13:50:32.827000
1 2022-1 no outlier PROCESOS SANTARIOS SPA 284 13 days 20:41:27.787309859 16 days 03:41:14.548368038 0 days 12:50:17.203000 4 days 21:52:34.927500 8 days 22:09:28.185000 16 days 00:39:24.182250 114 days 15:23:51.650000
2 2022-1 no outlier VEOLIA ENERGIA CHILE 132 28 days 04:14:15.729045454 31 days 23:11:17.629796112 1 days 00:03:08.623000 5 days 16:30:34.804000 14 days 08:04:03.280000 37 days 11:52:28.393250 118 days 03:55:29.213000
3 2022-1 no outlier VEOLIA HOLDING CHILE S.A. 91 24 days 11:18:41.364175824 22 days 12:12:41.932613080 0 days 21:07:27.457000 10 days 13:36:29.502000 16 days 15:20:22.343000 27 days 15:20:22.998500 106 days 17:54:42.327000
4 2022-1 no outlier VEOLIA RESIDUOS CHILE S.A. 16 41 days 05:28:25.235062500 57 days 21:05:09.055380919 0 days 14:57:56.833000 8 days 12:17:07.009750 26 days 03:15:52.876500 36 days 03:05:30.682250 228 days 21:13:48.960000
In [969]:
#Plot de delta de fecha de aprobacion y transaccion por mes en grupo de monto no outlier por Sucursal

stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})

ggplot(stats[stats['outlier_label'] == 'no outlier'], aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[969]:
<ggplot: (188938590929)>
In [970]:
#Plot de delta de fecha de aprobacion y transaccion por mes en grupo de monto outlier por Sucursal

ggplot(stats[stats['outlier_label'] != 'no outlier'], aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free')  +  theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[970]:
<ggplot: (188941366116)>
In [971]:
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos sobre y bajo monto limite por Sucursal

ggplot(stats, aes(x='N° Registros',y='Sucursal ', color='Dias')) + geom_point(aes(size='N° Registros'))  + xlab('N° registros') + ylab('Sucursal') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], ncol = 2) + theme_bw() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[971]:
<ggplot: (188945653944)>
In [972]:
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos sobre y bajo monto limite  por Sucursal

ggplot(df, aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (dias)') + facet_wrap('Fecha_de_la_transacción_year_month ', ncol = 2) + coord_flip() + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y=.5, aspect_ratio=2, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
Out[972]:
<ggplot: (188948863208)>
In [973]:
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Sucursal (igual que arriba, pero rota color = fecha, wrap = outlier )
ggplot(df, aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2, scales = 'free') + coord_flip() + theme_bw() + theme(panel_spacing_x=3, panel_spacing_y=.5, aspect_ratio=4, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
Out[973]:
<ggplot: (188938591647)>
In [974]:
#Zoom a grupo monto no outlier por sucursal
ggplot(df[df['outlier'] == False], aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (Dias)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
Out[974]:
<ggplot: (188946066856)>
In [975]:
#Zoom a grupo monto outlier por sucursal
ggplot(df[df['outlier'] == True], aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (Dias)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
Out[975]:
<ggplot: (188980201518)>

Tiempo en grupo monto atipico / no atipico por Categoria

In [976]:
#Estadisticas de mediana del delta de fecha de aprobacion y transaccion por categoria sobre grupos de monto outlier y no outler (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
Out[976]:
Categoría Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max
0 Aceite Hidraulico 2022-1 no outlier 4 3 days 09:59:02.405000 1 days 09:38:23.331365018 1 days 21:24:30.983000 2 days 09:25:18.118250 3 days 13:26:20.508500 4 days 14:00:04.795250 4 days 15:38:57.620000
1 Aceite Hidraulico 2022-2 no outlier 4 2 days 00:00:15.409250 1 days 22:58:42.884627810 0 days 16:34:35.127000 0 days 19:51:23.124000 1 days 05:07:30.376500 2 days 09:16:22.661750 4 days 21:11:25.757000
2 Aceite Motor 2022-1 no outlier 8 4 days 00:37:36.423125 1 days 20:11:34.060700196 0 days 21:59:31.997000 3 days 02:29:40.134000 4 days 13:39:26.703500 5 days 13:55:16.872000 5 days 16:30:34.777000
3 Aceite Motor 2022-2 no outlier 3 4 days 11:12:35.054666666 4 days 00:18:16.952125675 0 days 15:38:27.007000 2 days 08:59:44.133500 4 days 02:21:01.260000 6 days 08:59:39.078500 8 days 15:38:16.897000
4 Alojamientos 2022-1 no outlier 3 12 days 14:10:01.822000 8 days 17:01:53.336304872 6 days 15:37:40.023000 7 days 14:11:15.778000 8 days 12:44:51.533000 15 days 13:26:12.721500 22 days 14:07:33.910000
In [977]:
#top categorias con mayor mediana del delta de fecha de aprobacion y transaccion por mes en grupos de monto outlier y no outlier (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
Out[977]:
Categoría Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max
Index
0 Repuestos y mantto equipos - Veh. pesados 2022-1 outlier 3.000000000 70.333333333 56.580326381 5.000000000 54.000000000 103.000000000 103.000000000 103.000000000
1 Resto de coste indirecto - Multas 2022-1 outlier 1.000000000 91.000000000 NaN 91.000000000 91.000000000 91.000000000 91.000000000 91.000000000
2 Repuestos equipos y maquinaria 2022-1 no outlier 1.000000000 63.000000000 NaN 63.000000000 63.000000000 63.000000000 63.000000000 63.000000000
3 Materiales seguridad (Lentes, mascara, chaleco... 2022-1 no outlier 2.000000000 48.500000000 60.104076401 6.000000000 27.250000000 48.500000000 69.750000000 91.000000000
4 Gastos de formacion y capacitaciones 2022-1 no outlier 2.000000000 45.500000000 20.506096654 31.000000000 38.250000000 45.500000000 52.750000000 60.000000000
In [978]:
#Top categorias por mediana del delta de fecha de aprobacion y transaccion por mes en grupos de monto outlier y no outlier
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)

ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[978]:
<ggplot: (188949446479)>
In [979]:
#top categorias que se repiten mes a mes por grupo de monto outlier y no outler

stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())

categoria_count = stats_top.groupby(['Categoría ', 'outlier_label'])['Categoría '].count().sort_values(ascending=False)

categoria_count = categoria_count[categoria_count == n_month].index.to_list()
df_aux = df 
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Categoría '].to_list(), df_aux['outlier_label'].to_list()))

df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
In [980]:
#Top categorias que aparecen mes a mes por grupo sobre y bajo el limite

stats = df_top_categorias.groupby(['Categoría ', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

ggplot(stats, aes(y='50%', x = 'Categoría ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 1, scales = 'free') + coord_flip()  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[980]:
<ggplot: (188949391775)>
In [981]:
#Volumetria de registros y mediana del delta de fecha de aprobacion y transaccion de categorias que aparecen mes a mes por grupo sobre y bajo el limite

stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})

ggplot(stats, aes(x='N° Registros',y='Categoría ', color='Dias')) + geom_point(aes(size='N° Registros'))  + xlab('N° registros') + ylab('Categoria') + facet_wrap('outlier_label', ncol = 2) + theme_bw() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[981]:
<ggplot: (188943647060)>
In [982]:
#Boxplot de registros que pertenecen a las categorias que aparecen en el top mes a mes por grupo sobre y bajo el limite
ggplot(df_top_categorias, aes(x='Categoría ',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Categoria') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2) + coord_flip() + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y=.5, aspect_ratio=2, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[982]:
<ggplot: (188933654706)>

Tiempo en grupo monto atipico / no atipico por Proveedor

In [983]:
#Estadisticas de mediana del delta de fecha de aprobacion y transaccion por proveedores sobre grupos de monto outlier y no outlier (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
Out[983]:
Proveedor Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max
0 0 2022-1 no outlier 4 3 days 23:43:18.438250 2 days 16:23:16.663693644 1 days 15:02:04.650000 2 days 15:34:06.047250 3 days 05:55:43.390000 4 days 14:04:55.781000 7 days 19:59:42.323000
1 12636468-7 2022-2 no outlier 1 34 days 16:50:17.367000 NaT 34 days 16:50:17.367000 34 days 16:50:17.367000 34 days 16:50:17.367000 34 days 16:50:17.367000 34 days 16:50:17.367000
2 14324088-6 2022-2 no outlier 1 21 days 16:50:17.250000 NaT 21 days 16:50:17.250000 21 days 16:50:17.250000 21 days 16:50:17.250000 21 days 16:50:17.250000 21 days 16:50:17.250000
3 24100 2022-1 no outlier 1 7 days 21:25:58.807000 NaT 7 days 21:25:58.807000 7 days 21:25:58.807000 7 days 21:25:58.807000 7 days 21:25:58.807000 7 days 21:25:58.807000
4 2DO JUZGADO POLICIA LOCAL ( ABONAR A CLAUDIO O... 2022-1 no outlier 1 36 days 21:26:56.093000 NaT 36 days 21:26:56.093000 36 days 21:26:56.093000 36 days 21:26:56.093000 36 days 21:26:56.093000 36 days 21:26:56.093000
In [984]:
#top proveedores con mayor mediana del delta de fecha de aprobacion y transaccion por mes en grupos de monto outlier y no outlier (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
Out[984]:
Proveedor Fecha_de_la_transacción_year_month outlier_label count mean std min 25% 50% 75% max
Index
0 Ruta La Araucanía S.A. 2022-1 no outlier 1.000000000 208.000000000 NaN 208.000000000 208.000000000 208.000000000 208.000000000 208.000000000
1 China King 2022-1 no outlier 1.000000000 207.000000000 NaN 207.000000000 207.000000000 207.000000000 207.000000000 207.000000000
2 Claudio DOugnac 2022-1 outlier 1.000000000 184.000000000 NaN 184.000000000 184.000000000 184.000000000 184.000000000 184.000000000
3 CENCOSUD EASY 2022-1 no outlier 1.000000000 140.000000000 NaN 140.000000000 140.000000000 140.000000000 140.000000000 140.000000000
4 Eco kleen car wash 2022-1 no outlier 1.000000000 117.000000000 NaN 117.000000000 117.000000000 117.000000000 117.000000000 117.000000000
In [985]:
#Top proveedores por mediana del delta de fecha de aprobacion y transaccion por mes en grupos monto outlier y no outlier
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)

ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip()  +  theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
Out[985]:
<ggplot: (188938644769)>
In [986]:
#top Proveedores que se repiten mes a mes por grupo de monto outlier y no outlier

stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())

categoria_count = stats_top.groupby(['Proveedor ', 'outlier_label'])['Proveedor '].count().sort_values(ascending=False)

categoria_count = categoria_count[categoria_count == n_month].index.to_list()
df_aux = df 
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Proveedor '].to_list(), df_aux['outlier_label'].to_list()))

df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
In [987]:
#Top proveedores que aparecen mes a mes por grupo de monto outlier y no outlier 

stats = df_top_categorias.groupby(['Proveedor ', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()

ggplot(stats, aes(y='50%', x = 'Proveedor ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 1, scales = 'free') + coord_flip()  +  theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[987]:
<ggplot: (188939410429)>
In [988]:
#Volumetria de registros y mediana del delta de fecha de aprobacion y transaccion de categorias que aparecen mes a mes por grupo de monto outlier y no outlier

stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})

ggplot(stats, aes(x='N° Registros',y='Proveedor ', color='Dias')) + geom_point(aes(size='N° Registros'))  + xlab('N° registros') + ylab('Sucursal') + facet_wrap('outlier_label', ncol = 2) + theme_bw() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[988]:
<ggplot: (188933659832)>
In [989]:
#Boxplot de registros que pertenecen a los proveedores que aparecen en el top mes a mes por grupo sobre y bajo el limite
ggplot(df_top_categorias, aes(x='Proveedor ',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Sucursal') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2) + coord_flip() + theme_bw() 
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Out[989]:
<ggplot: (188945593477)>